From 6e50b41e9c91ddaa86c1c1d8876944cbdd51f388 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Wed, 12 May 2021 20:04:59 -0400
Subject: [PATCH 01/29] checkpoint

---
 google/cloud/aiplatform/__init__.py |   3 +-
 google/cloud/aiplatform/jobs.py     | 159 ++++++++++++++++++++++++++--
 2 files changed, 155 insertions(+), 7 deletions(-)

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 58eb824454..c631c2fd3c 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -27,7 +27,7 @@
 )
 from google.cloud.aiplatform.models import Endpoint
 from google.cloud.aiplatform.models import Model
-from google.cloud.aiplatform.jobs import BatchPredictionJob
+from google.cloud.aiplatform.jobs import BatchPredictionJob, CustomJob
 from google.cloud.aiplatform.training_jobs import (
     CustomTrainingJob,
     CustomContainerTrainingJob,
@@ -55,6 +55,7 @@
     "AutoMLTextTrainingJob",
     "AutoMLVideoTrainingJob",
     "BatchPredictionJob",
+    "CustomJob",
     "CustomTrainingJob",
     "CustomContainerTrainingJob",
     "CustomPythonPackageTrainingJob",
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index ee6d46dde9..29d6073d9c 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -22,29 +22,33 @@
 import time
 import logging
 
+
 from google.cloud import storage
 from google.cloud import bigquery
 
 from google.auth import credentials as auth_credentials
+from google.protobuf import duration_pb2  # type: ignore
 
 from google.cloud import aiplatform
 from google.cloud.aiplatform import base
-from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import compat
 from google.cloud.aiplatform import constants
+from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform import training_jobs
 from google.cloud.aiplatform import utils
 
 from google.cloud.aiplatform.compat.services import job_service_client
 from google.cloud.aiplatform.compat.types import (
-    io as gca_io_compat,
-    io_v1beta1 as gca_io_v1beta1,
-    job_state as gca_job_state,
     batch_prediction_job as gca_bp_job_compat,
     batch_prediction_job_v1 as gca_bp_job_v1,
     batch_prediction_job_v1beta1 as gca_bp_job_v1beta1,
+    custom_job as gca_custom_job_compat,
+    explanation_v1beta1 as gca_explanation_v1beta1,
+    io as gca_io_compat,
+    io_v1beta1 as gca_io_v1beta1,
+    job_state as gca_job_state,
     machine_resources as gca_machine_resources_compat,
     machine_resources_v1beta1 as gca_machine_resources_v1beta1,
-    explanation_v1beta1 as gca_explanation_v1beta1,
 )
 
 logging.basicConfig(level=logging.INFO, stream=sys.stdout)
@@ -173,7 +177,7 @@ def _block_until_complete(self):
                     )
                 )
                 log_wait = min(log_wait * multiplier, max_wait)
-            previous_time = current_time
+                previous_time = current_time
             time.sleep(wait)
 
         _LOGGER.log_action_completed_against_resource("", "run", self)
@@ -777,6 +781,149 @@ class CustomJob(_Job):
     _job_type = "training"
     pass
 
+    def __init__(self,
+        display_name: str,
+        worker_pool_specs: Union[Dict],
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+        encryption_spec_key_name: Optional[str] = None,
+        staging_bucket: Optional[str] = None):
+
+        base.AiPlatformResourceNounWithFutureManager.__init__(self,
+                project=project,
+                location=location,
+                credentials=credentials   
+        )
+
+        self._parent = aiplatform.initializer.global_config.common_location_path(
+                project=project,
+                location=location
+            )
+
+        staging_bucket = staging_bucket or initializer.global_config.staging_bucket
+
+        if not staging_bucket:
+            raise RuntimeError(
+                "staging_bucket should be passed to CustomJob constructor or "
+                "should be set using aiplatform.init(staging_bucket='gs://my-bucket')"
+            )
+
+        self._gca_resource  = gca_custom_job_compat.CustomJob(
+            display_name=display_name,
+            job_spec = gca_custom_job_compat.CustomJobSpec(
+                worker_pool_specs=worker_pool_specs,
+                base_output_directory=gca_io_compat.GcsDestination(output_uri_prefix=staging_bucket),
+                ),
+            encryption_spec= initializer.global_config.get_encryption_spec(
+                encryption_spec_key_name=encryption_spec_key_name
+            )
+        )
+
+
+    @classmethod
+    def from_local_script(
+            cls,
+            display_name: str,
+            script_path: str,
+            container_uri: str,
+            args: Optional[List[Union[str, float, int]]] = None,
+            requirements: Optional[Sequence[str]] = None,
+            environment_variables: Optional[Dict[str, str]] = None,
+            replica_count: int = 1,
+            machine_type: str = "n1-standard-4",
+            accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
+            accelerator_count: int = 0,
+            project: Optional[str] = None,
+            location: Optional[str] = None,
+            staging_bucket: Optional[str]= None,
+            credentials: Optional[auth_credentials.Credentials] = None,
+            encryption_spec_key_name: Optional[str] = None,
+        ) -> 'CustomJob':
+
+        project = project or initializer.global_config.project
+        location = location or initializer.global_config.location
+        staging_bucket = staging_bucket or initializer.global_config.staging_bucket
+
+        if not staging_bucket:
+            raise RuntimeError(
+                "staging_bucket should be passed to CustomJob.from_local_script or "
+                "should be set using aiplatform.init(staging_bucket='gs://my-bucket')"
+            )
+
+        worker_pool_specs = training_jobs._DistributedTrainingSpec.chief_worker_pool(
+            replica_count=replica_count,
+            machine_type=machine_type,
+            accelerator_count=accelerator_count,
+            accelerator_type=accelerator_type,
+        ).pool_specs
+
+
+        python_packager = training_jobs._TrainingScriptPythonPackager(
+                script_path=script_path, requirements=requirements
+            )
+
+        package_gcs_uri = python_packager.package_and_copy_to_gcs(
+            gcs_staging_dir = staging_bucket,
+            project = project,
+            credentials = credentials,
+        )
+
+        for spec in worker_pool_specs:
+            spec["pythonPackageSpec"] = {
+                "executorImageUri": container_uri,
+                "pythonModule": python_packager.module_name,
+                "packageUris": [package_gcs_uri],
+            }
+
+            if args:
+                spec["pythonPackageSpec"]["args"] = args
+
+            if environment_variables:
+                spec["pythonPackageSpec"]["env"] = [
+                    {"name": key, "value": value}
+                    for key, value in environment_variables.items()
+                ]
+
+        return cls(
+            display_name=display_name,
+            worker_pool_specs=worker_pool,
+            project=project,
+            location=location,
+            credentials=credentials,
+            encryption_spec_key_name=encryption_spec_key_name,
+            staging_bucket=staging_bucket)
+
+
+    @base.optional_sync()
+    def run(
+        self,
+        service_account: Optional[str] = None,
+        network: Optional[str] = None,
+        timeout: Optional[int] = None, # seconds
+        restart_job_on_worker_restart: bool=False,
+        sync: bool = True):
+
+        if service_account:
+            self._gca_resource.service_account = service_account
+
+        if network:
+            self._gca_resource.network = network
+
+
+        if timeout or restart_job_on_worker_restart:
+            timout = duration_pb2.Duration(seconds=timout) if timeout else None
+            self._gca_resource.job_spec.scheduling = gca_custom_job_compat.Scheduling(
+                    timeout=timeout, 
+                    restart_job_on_worker_restart=restart_job_on_worker_restart
+                )
+
+        self._gca_resource = self.api_client.create_custom_job(
+                parent=self._parent, custom_job=self._gca_resource
+            )
+
+        self._block_until_complete()
+
 
 class DataLabelingJob(_Job):
     _resource_noun = "dataLabelingJobs"

From c1dfd629b8691c5122deb635a981e98e7b79c690 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Thu, 13 May 2021 11:02:46 -0400
Subject: [PATCH 02/29] checkpoint

---
 google/__init__.py                            |   0
 google/cloud/__init__.py                      |   0
 google/cloud/aiplatform/jobs.py               |  56 +--
 google/cloud/aiplatform/training_jobs.py      | 458 +-----------------
 .../{utils.py => utils/__init__.py}           |  67 +++
 google/cloud/aiplatform/utils/source_utils.py | 216 +++++++++
 .../aiplatform/utils/worker_spec_utils.py     | 181 +++++++
 tests/unit/aiplatform/test_end_to_end.py      |   5 +-
 tests/unit/aiplatform/test_training_jobs.py   |  85 ++--
 9 files changed, 543 insertions(+), 525 deletions(-)
 create mode 100644 google/__init__.py
 create mode 100644 google/cloud/__init__.py
 rename google/cloud/aiplatform/{utils.py => utils/__init__.py} (87%)
 create mode 100644 google/cloud/aiplatform/utils/source_utils.py
 create mode 100644 google/cloud/aiplatform/utils/worker_spec_utils.py

diff --git a/google/__init__.py b/google/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 29d6073d9c..1d068d3435 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -22,7 +22,6 @@
 import time
 import logging
 
-
 from google.cloud import storage
 from google.cloud import bigquery
 
@@ -34,8 +33,9 @@
 from google.cloud.aiplatform import compat
 from google.cloud.aiplatform import constants
 from google.cloud.aiplatform import initializer
-from google.cloud.aiplatform import training_jobs
 from google.cloud.aiplatform import utils
+from  google.cloud.aiplatform.utils import source_utils
+from  google.cloud.aiplatform.utils import worker_spec_utils
 
 from google.cloud.aiplatform.compat.services import job_service_client
 from google.cloud.aiplatform.compat.types import (
@@ -823,23 +823,23 @@ def __init__(self,
 
     @classmethod
     def from_local_script(
-            cls,
-            display_name: str,
-            script_path: str,
-            container_uri: str,
-            args: Optional[List[Union[str, float, int]]] = None,
-            requirements: Optional[Sequence[str]] = None,
-            environment_variables: Optional[Dict[str, str]] = None,
-            replica_count: int = 1,
-            machine_type: str = "n1-standard-4",
-            accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
-            accelerator_count: int = 0,
-            project: Optional[str] = None,
-            location: Optional[str] = None,
-            staging_bucket: Optional[str]= None,
-            credentials: Optional[auth_credentials.Credentials] = None,
-            encryption_spec_key_name: Optional[str] = None,
-        ) -> 'CustomJob':
+        cls,
+        display_name: str,
+        script_path: str,
+        container_uri: str,
+        args: Optional[List[Union[str, float, int]]] = None,
+        requirements: Optional[Sequence[str]] = None,
+        environment_variables: Optional[Dict[str, str]] = None,
+        replica_count: int = 1,
+        machine_type: str = "n1-standard-4",
+        accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
+        accelerator_count: int = 0,
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        staging_bucket: Optional[str]= None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+        encryption_spec_key_name: Optional[str] = None,
+    ) -> 'CustomJob':
 
         project = project or initializer.global_config.project
         location = location or initializer.global_config.location
@@ -851,7 +851,7 @@ def from_local_script(
                 "should be set using aiplatform.init(staging_bucket='gs://my-bucket')"
             )
 
-        worker_pool_specs = training_jobs._DistributedTrainingSpec.chief_worker_pool(
+        worker_pool_specs = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=replica_count,
             machine_type=machine_type,
             accelerator_count=accelerator_count,
@@ -859,7 +859,7 @@ def from_local_script(
         ).pool_specs
 
 
-        python_packager = training_jobs._TrainingScriptPythonPackager(
+        python_packager = source_utils._TrainingScriptPythonPackager(
                 script_path=script_path, requirements=requirements
             )
 
@@ -870,24 +870,24 @@ def from_local_script(
         )
 
         for spec in worker_pool_specs:
-            spec["pythonPackageSpec"] = {
-                "executorImageUri": container_uri,
-                "pythonModule": python_packager.module_name,
-                "packageUris": [package_gcs_uri],
+            spec["python_package_spec"] = {
+                "executor_image_uri": container_uri,
+                "python_module": python_packager.module_name,
+                "package_uris": [package_gcs_uri],
             }
 
             if args:
-                spec["pythonPackageSpec"]["args"] = args
+                spec["python_package_spec"]["args"] = args
 
             if environment_variables:
-                spec["pythonPackageSpec"]["env"] = [
+                spec["python_package_spec"]["env"] = [
                     {"name": key, "value": value}
                     for key, value in environment_variables.items()
                 ]
 
         return cls(
             display_name=display_name,
-            worker_pool_specs=worker_pool,
+            worker_pool_specs=worker_pool_specs,
             project=project,
             location=location,
             credentials=credentials,
diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
index 2912806a12..f8f56bd5da 100644
--- a/google/cloud/aiplatform/training_jobs.py
+++ b/google/cloud/aiplatform/training_jobs.py
@@ -15,16 +15,9 @@
 # limitations under the License.
 #
 
-import datetime
-import functools
-import logging
-import pathlib
-import shutil
-import subprocess
 import sys
-import tempfile
 import time
-from typing import Callable, Dict, List, Optional, NamedTuple, Sequence, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import abc
 
@@ -38,25 +31,25 @@
 from google.cloud.aiplatform import utils
 
 from google.cloud.aiplatform.compat.types import (
-    accelerator_type as gca_accelerator_type,
     env_var as gca_env_var,
     io as gca_io,
     model as gca_model,
     pipeline_state as gca_pipeline_state,
     training_pipeline as gca_training_pipeline,
 )
+from google.cloud.aiplatform.utils import _timestamped_gcs_dir
+from google.cloud.aiplatform.utils.source_utils import _TrainingScriptPythonPackager
+from google.cloud.aiplatform.utils.worker_spec_utils import _DistributedTrainingSpec
 
 from google.cloud.aiplatform.v1.schema.trainingjob import (
     definition_v1 as training_job_inputs,
 )
 
-from google.cloud import storage
 from google.rpc import code_pb2
 
 import proto
 
 
-logging.basicConfig(level=logging.INFO, stream=sys.stdout)
 _LOGGER = base.Logger(__name__)
 
 _PIPELINE_COMPLETE_STATES = set(
@@ -780,449 +773,6 @@ def cancel(self) -> None:
         self.api_client.cancel_training_pipeline(name=self.resource_name)
 
 
-def _timestamped_gcs_dir(root_gcs_path: str, dir_name_prefix: str) -> str:
-    """Composes a timestamped GCS directory.
-
-    Args:
-        root_gcs_path: GCS path to put the timestamped directory.
-        dir_name_prefix: Prefix to add the timestamped directory.
-    Returns:
-        Timestamped gcs directory path in root_gcs_path.
-    """
-    timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds")
-    dir_name = "-".join([dir_name_prefix, timestamp])
-    if root_gcs_path.endswith("/"):
-        root_gcs_path = root_gcs_path[:-1]
-    gcs_path = "/".join([root_gcs_path, dir_name])
-    if not gcs_path.startswith("gs://"):
-        return "gs://" + gcs_path
-    return gcs_path
-
-
-def _timestamped_copy_to_gcs(
-    local_file_path: str,
-    gcs_dir: str,
-    project: Optional[str] = None,
-    credentials: Optional[auth_credentials.Credentials] = None,
-) -> str:
-    """Copies a local file to a GCS path.
-
-    The file copied to GCS is the name of the local file prepended with an
-    "aiplatform-{timestamp}-" string.
-
-    Args:
-        local_file_path (str): Required. Local file to copy to GCS.
-        gcs_dir (str):
-            Required. The GCS directory to copy to.
-        project (str):
-            Project that contains the staging bucket. Default will be used if not
-            provided. Model Builder callers should pass this in.
-        credentials (auth_credentials.Credentials):
-            Custom credentials to use with bucket. Model Builder callers should pass
-            this in.
-    Returns:
-        gcs_path (str): The path of the copied file in gcs.
-    """
-
-    gcs_bucket, gcs_blob_prefix = utils.extract_bucket_and_prefix_from_gcs_path(gcs_dir)
-
-    local_file_name = pathlib.Path(local_file_path).name
-    timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds")
-    blob_path = "-".join(["aiplatform", timestamp, local_file_name])
-
-    if gcs_blob_prefix:
-        blob_path = "/".join([gcs_blob_prefix, blob_path])
-
-    # TODO(b/171202993) add user agent
-    client = storage.Client(project=project, credentials=credentials)
-    bucket = client.bucket(gcs_bucket)
-    blob = bucket.blob(blob_path)
-    blob.upload_from_filename(local_file_path)
-
-    gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])])
-    return gcs_path
-
-
-def _get_python_executable() -> str:
-    """Returns Python executable.
-
-    Returns:
-        Python executable to use for setuptools packaging.
-    Raises:
-        EnvironmentError: If Python executable is not found.
-    """
-
-    python_executable = sys.executable
-
-    if not python_executable:
-        raise EnvironmentError("Cannot find Python executable for packaging.")
-    return python_executable
-
-
-class _TrainingScriptPythonPackager:
-    """Converts a Python script into Python package suitable for aiplatform
-    training.
-
-    Copies the script to specified location.
-
-    Class Attributes:
-        _TRAINER_FOLDER: Constant folder name to build package.
-        _ROOT_MODULE: Constant root name of module.
-        _TEST_MODULE_NAME: Constant name of module that will store script.
-        _SETUP_PY_VERSION: Constant version of this created python package.
-        _SETUP_PY_TEMPLATE: Constant template used to generate setup.py file.
-        _SETUP_PY_SOURCE_DISTRIBUTION_CMD:
-            Constant command to generate the source distribution package.
-
-    Attributes:
-        script_path: local path of script to package
-        requirements: list of Python dependencies to add to package
-
-    Usage:
-
-    packager = TrainingScriptPythonPackager('my_script.py', ['pandas', 'pytorch'])
-    gcs_path = packager.package_and_copy_to_gcs(
-        gcs_staging_dir='my-bucket',
-        project='my-prject')
-    module_name = packager.module_name
-
-    The package after installed can be executed as:
-    python -m aiplatform_custom_trainer_script.task
-    """
-
-    _TRAINER_FOLDER = "trainer"
-    _ROOT_MODULE = "aiplatform_custom_trainer_script"
-    _TASK_MODULE_NAME = "task"
-    _SETUP_PY_VERSION = "0.1"
-
-    _SETUP_PY_TEMPLATE = """from setuptools import find_packages
-from setuptools import setup
-
-setup(
-    name='{name}',
-    version='{version}',
-    packages=find_packages(),
-    install_requires=({requirements}),
-    include_package_data=True,
-    description='My training application.'
-)"""
-
-    _SETUP_PY_SOURCE_DISTRIBUTION_CMD = "setup.py sdist --formats=gztar"
-
-    # Module name that can be executed during training. ie. python -m
-    module_name = f"{_ROOT_MODULE}.{_TASK_MODULE_NAME}"
-
-    def __init__(self, script_path: str, requirements: Optional[Sequence[str]] = None):
-        """Initializes packager.
-
-        Args:
-            script_path (str): Required. Local path to script.
-            requirements (Sequence[str]):
-                List of python packages dependencies of script.
-        """
-
-        self.script_path = script_path
-        self.requirements = requirements or []
-
-    def make_package(self, package_directory: str) -> str:
-        """Converts script into a Python package suitable for python module
-        execution.
-
-        Args:
-            package_directory (str): Directory to build package in.
-        Returns:
-            source_distribution_path (str): Path to built package.
-        Raises:
-            RunTimeError: If package creation fails.
-        """
-        # The root folder to builder the package in
-        package_path = pathlib.Path(package_directory)
-
-        # Root directory of the package
-        trainer_root_path = package_path / self._TRAINER_FOLDER
-
-        # The root module of the python package
-        trainer_path = trainer_root_path / self._ROOT_MODULE
-
-        # __init__.py path in root module
-        init_path = trainer_path / "__init__.py"
-
-        # The module that will contain the script
-        script_out_path = trainer_path / f"{self._TASK_MODULE_NAME}.py"
-
-        # The path to setup.py in the package.
-        setup_py_path = trainer_root_path / "setup.py"
-
-        # The path to the generated source distribution.
-        source_distribution_path = (
-            trainer_root_path
-            / "dist"
-            / f"{self._ROOT_MODULE}-{self._SETUP_PY_VERSION}.tar.gz"
-        )
-
-        trainer_root_path.mkdir()
-        trainer_path.mkdir()
-
-        # Make empty __init__.py
-        with init_path.open("w"):
-            pass
-
-        # Format the setup.py file.
-        setup_py_output = self._SETUP_PY_TEMPLATE.format(
-            name=self._ROOT_MODULE,
-            requirements=",".join(f'"{r}"' for r in self.requirements),
-            version=self._SETUP_PY_VERSION,
-        )
-
-        # Write setup.py
-        with setup_py_path.open("w") as fp:
-            fp.write(setup_py_output)
-
-        # Copy script as module of python package.
-        shutil.copy(self.script_path, script_out_path)
-
-        # Run setup.py to create the source distribution.
-        setup_cmd = [
-            _get_python_executable()
-        ] + self._SETUP_PY_SOURCE_DISTRIBUTION_CMD.split()
-
-        p = subprocess.Popen(
-            args=setup_cmd,
-            cwd=trainer_root_path,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
-        output, error = p.communicate()
-
-        # Raise informative error if packaging fails.
-        if p.returncode != 0:
-            raise RuntimeError(
-                "Packaging of training script failed with code %d\n%s \n%s"
-                % (p.returncode, output.decode(), error.decode())
-            )
-
-        return str(source_distribution_path)
-
-    def package_and_copy(self, copy_method: Callable[[str], str]) -> str:
-        """Packages the script and executes copy with given copy_method.
-
-        Args:
-            copy_method Callable[[str], str]
-                Takes a string path, copies to a desired location, and returns the
-                output path location.
-        Returns:
-            output_path str: Location of copied package.
-        """
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            source_distribution_path = self.make_package(tmpdirname)
-            output_location = copy_method(source_distribution_path)
-            _LOGGER.info("Training script copied to:\n%s." % output_location)
-            return output_location
-
-    def package_and_copy_to_gcs(
-        self,
-        gcs_staging_dir: str,
-        project: str = None,
-        credentials: Optional[auth_credentials.Credentials] = None,
-    ) -> str:
-        """Packages script in Python package and copies package to GCS bucket.
-
-        Args
-            gcs_staging_dir (str): Required. GCS Staging directory.
-            project (str): Required. Project where GCS Staging bucket is located.
-            credentials (auth_credentials.Credentials):
-                Optional credentials used with GCS client.
-        Returns:
-            GCS location of Python package.
-        """
-
-        copy_method = functools.partial(
-            _timestamped_copy_to_gcs,
-            gcs_dir=gcs_staging_dir,
-            project=project,
-            credentials=credentials,
-        )
-        return self.package_and_copy(copy_method=copy_method)
-
-
-class _MachineSpec(NamedTuple):
-    """Specification container for Machine specs used for distributed training.
-
-    Usage:
-
-    spec = _MachineSpec(
-                replica_count=10,
-                machine_type='n1-standard-4',
-                accelerator_count=2,
-                accelerator_type='NVIDIA_TESLA_K80')
-
-    Note that container and python package specs are not stored with this spec.
-    """
-
-    replica_count: int = 0
-    machine_type: str = "n1-standard-4"
-    accelerator_count: int = 0
-    accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED"
-
-    def _get_accelerator_type(self) -> Optional[str]:
-        """Validates accelerator_type and returns the name of the accelerator.
-
-        Returns:
-            None if no accelerator or valid accelerator name.
-
-        Raise:
-            ValueError if accelerator type is invalid.
-        """
-
-        # Raises ValueError if invalid accelerator_type
-        utils.validate_accelerator_type(self.accelerator_type)
-
-        accelerator_enum = getattr(
-            gca_accelerator_type.AcceleratorType, self.accelerator_type
-        )
-
-        if (
-            accelerator_enum
-            != gca_accelerator_type.AcceleratorType.ACCELERATOR_TYPE_UNSPECIFIED
-        ):
-            return self.accelerator_type
-
-    @property
-    def spec_dict(self) -> Dict[str, Union[int, str, Dict[str, Union[int, str]]]]:
-        """Return specification as a Dict."""
-        spec = {
-            "machineSpec": {"machineType": self.machine_type},
-            "replicaCount": self.replica_count,
-        }
-        accelerator_type = self._get_accelerator_type()
-        if accelerator_type and self.accelerator_count:
-            spec["machineSpec"]["acceleratorType"] = accelerator_type
-            spec["machineSpec"]["acceleratorCount"] = self.accelerator_count
-
-        return spec
-
-    @property
-    def is_empty(self) -> bool:
-        """Returns True is replica_count > 0 False otherwise."""
-        return self.replica_count <= 0
-
-
-class _DistributedTrainingSpec(NamedTuple):
-    """Configuration for distributed training worker pool specs.
-
-    AI Platform Training expects configuration in this order:
-    [
-        chief spec, # can only have one replica
-        worker spec,
-        parameter server spec,
-        evaluator spec
-    ]
-
-    Usage:
-
-    dist_training_spec = _DistributedTrainingSpec(
-        chief_spec = _MachineSpec(
-                replica_count=1,
-                machine_type='n1-standard-4',
-                accelerator_count=2,
-                accelerator_type='NVIDIA_TESLA_K80'
-                ),
-        worker_spec = _MachineSpec(
-                replica_count=10,
-                machine_type='n1-standard-4',
-                accelerator_count=2,
-                accelerator_type='NVIDIA_TESLA_K80'
-                )
-    )
-    """
-
-    chief_spec: _MachineSpec = _MachineSpec()
-    worker_spec: _MachineSpec = _MachineSpec()
-    parameter_server_spec: _MachineSpec = _MachineSpec()
-    evaluator_spec: _MachineSpec = _MachineSpec()
-
-    @property
-    def pool_specs(
-        self,
-    ) -> List[Dict[str, Union[int, str, Dict[str, Union[int, str]]]]]:
-        """Return each pools spec in correct order for AI Platform as a list of
-        dicts.
-
-        Also removes specs if they are empty but leaves specs in if there unusual
-        specifications to not break the ordering in AI Platform Training.
-        ie. 0 chief replica, 10 worker replica, 3 ps replica
-
-        Returns:
-            Order list of worker pool specs suitable for AI Platform Training.
-        """
-        if self.chief_spec.replica_count > 1:
-            raise ValueError("Chief spec replica count cannot be greater than 1.")
-
-        spec_order = [
-            self.chief_spec,
-            self.worker_spec,
-            self.parameter_server_spec,
-            self.evaluator_spec,
-        ]
-        specs = [s.spec_dict for s in spec_order]
-        for i in reversed(range(len(spec_order))):
-            if spec_order[i].is_empty:
-                specs.pop()
-            else:
-                break
-        return specs
-
-    @classmethod
-    def chief_worker_pool(
-        cls,
-        replica_count: int = 0,
-        machine_type: str = "n1-standard-4",
-        accelerator_count: int = 0,
-        accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
-    ) -> "_DistributedTrainingSpec":
-        """Parameterizes Config to support only chief with worker replicas.
-
-        For replica is assigned to chief and the remainder to workers. All spec have the
-        same machine type, accelerator count, and accelerator type.
-
-        Args:
-            replica_count (int):
-                The number of worker replicas. Assigns 1 chief replica and
-                replica_count - 1 worker replicas.
-            machine_type (str):
-                The type of machine to use for training.
-            accelerator_type (str):
-                Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED,
-                NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4,
-                NVIDIA_TESLA_T4
-            accelerator_count (int):
-                The number of accelerators to attach to a worker replica.
-
-        Returns:
-            _DistributedTrainingSpec representing one chief and n workers all of same
-            type. If replica_count <= 0 then an empty spec is returned.
-        """
-        if replica_count <= 0:
-            return cls()
-
-        chief_spec = _MachineSpec(
-            replica_count=1,
-            machine_type=machine_type,
-            accelerator_count=accelerator_count,
-            accelerator_type=accelerator_type,
-        )
-
-        worker_spec = _MachineSpec(
-            replica_count=replica_count - 1,
-            machine_type=machine_type,
-            accelerator_count=accelerator_count,
-            accelerator_type=accelerator_type,
-        )
-
-        return cls(chief_spec=chief_spec, worker_spec=worker_spec)
-
-
 class _CustomTrainingJob(_TrainingJob):
     """ABC for Custom Training Pipelines.."""
 
diff --git a/google/cloud/aiplatform/utils.py b/google/cloud/aiplatform/utils/__init__.py
similarity index 87%
rename from google/cloud/aiplatform/utils.py
rename to google/cloud/aiplatform/utils/__init__.py
index ff86fc1cb8..c847a56244 100644
--- a/google/cloud/aiplatform/utils.py
+++ b/google/cloud/aiplatform/utils/__init__.py
@@ -17,6 +17,8 @@
 
 
 import abc
+import datetime
+import pathlib
 from collections import namedtuple
 import logging
 import re
@@ -25,6 +27,8 @@
 from google.api_core import client_options
 from google.api_core import gapic_v1
 from google.auth import credentials as auth_credentials
+from google.cloud import storage
+
 from google.cloud.aiplatform import compat
 from google.cloud.aiplatform import constants
 from google.cloud.aiplatform import initializer
@@ -499,3 +503,66 @@ def __init__(self, warning_level: int):
 
     def filter(self, record):
         return record.levelname == self._warning_level
+
+
+def _timestamped_gcs_dir(root_gcs_path: str, dir_name_prefix: str) -> str:
+    """Composes a timestamped GCS directory.
+
+    Args:
+        root_gcs_path: GCS path to put the timestamped directory.
+        dir_name_prefix: Prefix to add the timestamped directory.
+    Returns:
+        Timestamped gcs directory path in root_gcs_path.
+    """
+    timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds")
+    dir_name = "-".join([dir_name_prefix, timestamp])
+    if root_gcs_path.endswith("/"):
+        root_gcs_path = root_gcs_path[:-1]
+    gcs_path = "/".join([root_gcs_path, dir_name])
+    if not gcs_path.startswith("gs://"):
+        return "gs://" + gcs_path
+    return gcs_path
+
+
+def _timestamped_copy_to_gcs(
+    local_file_path: str,
+    gcs_dir: str,
+    project: Optional[str] = None,
+    credentials: Optional[auth_credentials.Credentials] = None,
+) -> str:
+    """Copies a local file to a GCS path.
+
+    The file copied to GCS is the name of the local file prepended with an
+    "aiplatform-{timestamp}-" string.
+
+    Args:
+        local_file_path (str): Required. Local file to copy to GCS.
+        gcs_dir (str):
+            Required. The GCS directory to copy to.
+        project (str):
+            Project that contains the staging bucket. Default will be used if not
+            provided. Model Builder callers should pass this in.
+        credentials (auth_credentials.Credentials):
+            Custom credentials to use with bucket. Model Builder callers should pass
+            this in.
+    Returns:
+        gcs_path (str): The path of the copied file in gcs.
+    """
+
+    gcs_bucket, gcs_blob_prefix = extract_bucket_and_prefix_from_gcs_path(gcs_dir)
+
+    local_file_name = pathlib.Path(local_file_path).name
+    timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds")
+    blob_path = "-".join(["aiplatform", timestamp, local_file_name])
+
+    if gcs_blob_prefix:
+        blob_path = "/".join([gcs_blob_prefix, blob_path])
+
+    # TODO(b/171202993) add user agent
+    client = storage.Client(project=project, credentials=credentials)
+    bucket = client.bucket(gcs_bucket)
+    blob = bucket.blob(blob_path)
+    blob.upload_from_filename(local_file_path)
+
+    gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])])
+    return gcs_path
\ No newline at end of file
diff --git a/google/cloud/aiplatform/utils/source_utils.py b/google/cloud/aiplatform/utils/source_utils.py
new file mode 100644
index 0000000000..8539e3122d
--- /dev/null
+++ b/google/cloud/aiplatform/utils/source_utils.py
@@ -0,0 +1,216 @@
+import functools
+import pathlib
+import shutil
+import subprocess
+import sys
+import tempfile
+from typing import Optional, Sequence, Callable
+
+from google.auth import credentials as auth_credentials
+from google.cloud.aiplatform import base
+from google.cloud.aiplatform import utils
+
+_LOGGER = base.Logger(__name__)
+
+
+def _get_python_executable() -> str:
+    """Returns Python executable.
+
+    Returns:
+        Python executable to use for setuptools packaging.
+    Raises:
+        EnvironmentError: If Python executable is not found.
+    """
+
+    python_executable = sys.executable
+
+    if not python_executable:
+        raise EnvironmentError("Cannot find Python executable for packaging.")
+    return python_executable
+
+
+class _TrainingScriptPythonPackager:
+    """Converts a Python script into Python package suitable for aiplatform
+    training.
+
+    Copies the script to specified location.
+
+    Class Attributes:
+        _TRAINER_FOLDER: Constant folder name to build package.
+        _ROOT_MODULE: Constant root name of module.
+        _TEST_MODULE_NAME: Constant name of module that will store script.
+        _SETUP_PY_VERSION: Constant version of this created python package.
+        _SETUP_PY_TEMPLATE: Constant template used to generate setup.py file.
+        _SETUP_PY_SOURCE_DISTRIBUTION_CMD:
+            Constant command to generate the source distribution package.
+
+    Attributes:
+        script_path: local path of script to package
+        requirements: list of Python dependencies to add to package
+
+    Usage:
+
+    packager = TrainingScriptPythonPackager('my_script.py', ['pandas', 'pytorch'])
+    gcs_path = packager.package_and_copy_to_gcs(
+        gcs_staging_dir='my-bucket',
+        project='my-prject')
+    module_name = packager.module_name
+
+    The package after installed can be executed as:
+    python -m aiplatform_custom_trainer_script.task
+    """
+
+    _TRAINER_FOLDER = "trainer"
+    _ROOT_MODULE = "aiplatform_custom_trainer_script"
+    _TASK_MODULE_NAME = "task"
+    _SETUP_PY_VERSION = "0.1"
+
+    _SETUP_PY_TEMPLATE = """from setuptools import find_packages
+from setuptools import setup
+
+setup(
+    name='{name}',
+    version='{version}',
+    packages=find_packages(),
+    install_requires=({requirements}),
+    include_package_data=True,
+    description='My training application.'
+)"""
+
+    _SETUP_PY_SOURCE_DISTRIBUTION_CMD = "setup.py sdist --formats=gztar"
+
+    # Module name that can be executed during training. ie. python -m
+    module_name = f"{_ROOT_MODULE}.{_TASK_MODULE_NAME}"
+
+    def __init__(self, script_path: str, requirements: Optional[Sequence[str]] = None):
+        """Initializes packager.
+
+        Args:
+            script_path (str): Required. Local path to script.
+            requirements (Sequence[str]):
+                List of python packages dependencies of script.
+        """
+
+        self.script_path = script_path
+        self.requirements = requirements or []
+
+    def make_package(self, package_directory: str) -> str:
+        """Converts script into a Python package suitable for python module
+        execution.
+
+        Args:
+            package_directory (str): Directory to build package in.
+        Returns:
+            source_distribution_path (str): Path to built package.
+        Raises:
+            RunTimeError: If package creation fails.
+        """
+        # The root folder to builder the package in
+        package_path = pathlib.Path(package_directory)
+
+        # Root directory of the package
+        trainer_root_path = package_path / self._TRAINER_FOLDER
+
+        # The root module of the python package
+        trainer_path = trainer_root_path / self._ROOT_MODULE
+
+        # __init__.py path in root module
+        init_path = trainer_path / "__init__.py"
+
+        # The module that will contain the script
+        script_out_path = trainer_path / f"{self._TASK_MODULE_NAME}.py"
+
+        # The path to setup.py in the package.
+        setup_py_path = trainer_root_path / "setup.py"
+
+        # The path to the generated source distribution.
+        source_distribution_path = (
+            trainer_root_path
+            / "dist"
+            / f"{self._ROOT_MODULE}-{self._SETUP_PY_VERSION}.tar.gz"
+        )
+
+        trainer_root_path.mkdir()
+        trainer_path.mkdir()
+
+        # Make empty __init__.py
+        with init_path.open("w"):
+            pass
+
+        # Format the setup.py file.
+        setup_py_output = self._SETUP_PY_TEMPLATE.format(
+            name=self._ROOT_MODULE,
+            requirements=",".join(f'"{r}"' for r in self.requirements),
+            version=self._SETUP_PY_VERSION,
+        )
+
+        # Write setup.py
+        with setup_py_path.open("w") as fp:
+            fp.write(setup_py_output)
+
+        # Copy script as module of python package.
+        shutil.copy(self.script_path, script_out_path)
+
+        # Run setup.py to create the source distribution.
+        setup_cmd = [
+            _get_python_executable()
+        ] + self._SETUP_PY_SOURCE_DISTRIBUTION_CMD.split()
+
+        p = subprocess.Popen(
+            args=setup_cmd,
+            cwd=trainer_root_path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        output, error = p.communicate()
+
+        # Raise informative error if packaging fails.
+        if p.returncode != 0:
+            raise RuntimeError(
+                "Packaging of training script failed with code %d\n%s \n%s"
+                % (p.returncode, output.decode(), error.decode())
+            )
+
+        return str(source_distribution_path)
+
+    def package_and_copy(self, copy_method: Callable[[str], str]) -> str:
+        """Packages the script and executes copy with given copy_method.
+
+        Args:
+            copy_method Callable[[str], str]
+                Takes a string path, copies to a desired location, and returns the
+                output path location.
+        Returns:
+            output_path str: Location of copied package.
+        """
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            source_distribution_path = self.make_package(tmpdirname)
+            output_location = copy_method(source_distribution_path)
+            _LOGGER.info("Training script copied to:\n%s." % output_location)
+            return output_location
+
+    def package_and_copy_to_gcs(
+        self,
+        gcs_staging_dir: str,
+        project: str = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+    ) -> str:
+        """Packages script in Python package and copies package to GCS bucket.
+
+        Args
+            gcs_staging_dir (str): Required. GCS Staging directory.
+            project (str): Required. Project where GCS Staging bucket is located.
+            credentials (auth_credentials.Credentials):
+                Optional credentials used with GCS client.
+        Returns:
+            GCS location of Python package.
+        """
+
+        copy_method = functools.partial(
+            utils._timestamped_copy_to_gcs,
+            gcs_dir=gcs_staging_dir,
+            project=project,
+            credentials=credentials,
+        )
+        return self.package_and_copy(copy_method=copy_method)
\ No newline at end of file
diff --git a/google/cloud/aiplatform/utils/worker_spec_utils.py b/google/cloud/aiplatform/utils/worker_spec_utils.py
new file mode 100644
index 0000000000..a23b997f48
--- /dev/null
+++ b/google/cloud/aiplatform/utils/worker_spec_utils.py
@@ -0,0 +1,181 @@
+from typing import NamedTuple, Optional, Dict, Union, List
+
+from google.cloud.aiplatform import utils
+from google.cloud.aiplatform.compat.types import accelerator_type as gca_accelerator_type_compat
+
+
+class _MachineSpec(NamedTuple):
+    """Specification container for Machine specs used for distributed training.
+
+    Usage:
+
+    spec = _MachineSpec(
+                replica_count=10,
+                machine_type='n1-standard-4',
+                accelerator_count=2,
+                accelerator_type='NVIDIA_TESLA_K80')
+
+    Note that container and python package specs are not stored with this spec.
+    """
+
+    replica_count: int = 0
+    machine_type: str = "n1-standard-4"
+    accelerator_count: int = 0
+    accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED"
+
+    def _get_accelerator_type(self) -> Optional[str]:
+        """Validates accelerator_type and returns the name of the accelerator.
+
+        Returns:
+            None if no accelerator or valid accelerator name.
+
+        Raise:
+            ValueError if accelerator type is invalid.
+        """
+
+        # Raises ValueError if invalid accelerator_type
+        utils.validate_accelerator_type(self.accelerator_type)
+
+        accelerator_enum = getattr(
+            gca_accelerator_type_compat.AcceleratorType, self.accelerator_type
+        )
+
+        if (
+            accelerator_enum
+            != gca_accelerator_type_compat.AcceleratorType.ACCELERATOR_TYPE_UNSPECIFIED
+        ):
+            return self.accelerator_type
+
+    @property
+    def spec_dict(self) -> Dict[str, Union[int, str, Dict[str, Union[int, str]]]]:
+        """Return specification as a Dict."""
+        spec = {
+            "machine_spec": {"machine_type": self.machine_type},
+            "replica_count": self.replica_count,
+        }
+        accelerator_type = self._get_accelerator_type()
+        if accelerator_type and self.accelerator_count:
+            spec["machine_spec"]["accelerator_type"] = accelerator_type
+            spec["machine_spec"]["accelerator_count"] = self.accelerator_count
+
+        return spec
+
+    @property
+    def is_empty(self) -> bool:
+        """Returns True is replica_count > 0 False otherwise."""
+        return self.replica_count <= 0
+
+
+class _DistributedTrainingSpec(NamedTuple):
+    """Configuration for distributed training worker pool specs.
+
+    AI Platform Training expects configuration in this order:
+    [
+        chief spec, # can only have one replica
+        worker spec,
+        parameter server spec,
+        evaluator spec
+    ]
+
+    Usage:
+
+    dist_training_spec = _DistributedTrainingSpec(
+        chief_spec = _MachineSpec(
+                replica_count=1,
+                machine_type='n1-standard-4',
+                accelerator_count=2,
+                accelerator_type='NVIDIA_TESLA_K80'
+                ),
+        worker_spec = _MachineSpec(
+                replica_count=10,
+                machine_type='n1-standard-4',
+                accelerator_count=2,
+                accelerator_type='NVIDIA_TESLA_K80'
+                )
+    )
+    """
+
+    chief_spec: _MachineSpec = _MachineSpec()
+    worker_spec: _MachineSpec = _MachineSpec()
+    parameter_server_spec: _MachineSpec = _MachineSpec()
+    evaluator_spec: _MachineSpec = _MachineSpec()
+
+    @property
+    def pool_specs(
+        self,
+    ) -> List[Dict[str, Union[int, str, Dict[str, Union[int, str]]]]]:
+        """Return each pools spec in correct order for AI Platform as a list of
+        dicts.
+
+        Also removes specs if they are empty but leaves specs in if there unusual
+        specifications to not break the ordering in AI Platform Training.
+        ie. 0 chief replica, 10 worker replica, 3 ps replica
+
+        Returns:
+            Order list of worker pool specs suitable for AI Platform Training.
+        """
+        if self.chief_spec.replica_count > 1:
+            raise ValueError("Chief spec replica count cannot be greater than 1.")
+
+        spec_order = [
+            self.chief_spec,
+            self.worker_spec,
+            self.parameter_server_spec,
+            self.evaluator_spec,
+        ]
+        specs = [s.spec_dict for s in spec_order]
+        for i in reversed(range(len(spec_order))):
+            if spec_order[i].is_empty:
+                specs.pop()
+            else:
+                break
+        return specs
+
+    @classmethod
+    def chief_worker_pool(
+        cls,
+        replica_count: int = 0,
+        machine_type: str = "n1-standard-4",
+        accelerator_count: int = 0,
+        accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
+    ) -> "_DistributedTrainingSpec":
+        """Parameterizes Config to support only chief with worker replicas.
+
+        For replica is assigned to chief and the remainder to workers. All spec have the
+        same machine type, accelerator count, and accelerator type.
+
+        Args:
+            replica_count (int):
+                The number of worker replicas. Assigns 1 chief replica and
+                replica_count - 1 worker replicas.
+            machine_type (str):
+                The type of machine to use for training.
+            accelerator_type (str):
+                Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED,
+                NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4,
+                NVIDIA_TESLA_T4
+            accelerator_count (int):
+                The number of accelerators to attach to a worker replica.
+
+        Returns:
+            _DistributedTrainingSpec representing one chief and n workers all of same
+            type. If replica_count <= 0 then an empty spec is returned.
+        """
+        if replica_count <= 0:
+            return cls()
+
+        chief_spec = _MachineSpec(
+            replica_count=1,
+            machine_type=machine_type,
+            accelerator_count=accelerator_count,
+            accelerator_type=accelerator_type,
+        )
+
+        worker_spec = _MachineSpec(
+            replica_count=replica_count - 1,
+            machine_type=machine_type,
+            accelerator_count=accelerator_count,
+            accelerator_type=accelerator_type,
+        )
+
+        return cls(chief_spec=chief_spec, worker_spec=worker_spec)
\ No newline at end of file
diff --git a/tests/unit/aiplatform/test_end_to_end.py b/tests/unit/aiplatform/test_end_to_end.py
index 69c5517a69..f4b1355679 100644
--- a/tests/unit/aiplatform/test_end_to_end.py
+++ b/tests/unit/aiplatform/test_end_to_end.py
@@ -19,6 +19,7 @@
 
 from importlib import reload
 
+import google.cloud.aiplatform.utils.source_utils
 from google.cloud import aiplatform
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import models
@@ -212,7 +213,7 @@ def test_dataset_create_to_model_predict(
             },
             "pythonPackageSpec": {
                 "executorImageUri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
@@ -393,7 +394,7 @@ def test_dataset_create_to_model_predict_with_pipeline_fail(
             },
             "pythonPackageSpec": {
                 "executorImageUri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py
index 8fd82c7727..8f783edc15 100644
--- a/tests/unit/aiplatform/test_training_jobs.py
+++ b/tests/unit/aiplatform/test_training_jobs.py
@@ -30,6 +30,9 @@
 
 from google.auth import credentials as auth_credentials
 
+import google.cloud.aiplatform.utils
+import google.cloud.aiplatform.utils.source_utils
+import google.cloud.aiplatform.utils.worker_spec_utils
 from google.cloud import aiplatform
 
 from google.cloud.aiplatform import datasets
@@ -234,7 +237,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_bucket(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = training_jobs._timestamped_copy_to_gcs(
+        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_BUCKET_NAME,
             project=_TEST_PROJECT,
@@ -261,7 +264,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_gcs_path(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = training_jobs._timestamped_copy_to_gcs(
+        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_GCS_PATH_WITH_TRAILING_SLASH,
             project=_TEST_PROJECT,
@@ -289,7 +292,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_trailing_slash(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = training_jobs._timestamped_copy_to_gcs(
+        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_GCS_PATH,
             project=_TEST_PROJECT,
@@ -315,7 +318,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client(self, mock_client_bucket):
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = training_jobs._timestamped_copy_to_gcs(
+        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_BUCKET_NAME,
             project=_TEST_PROJECT,
@@ -332,10 +335,10 @@ def test_timestamp_copy_to_gcs_calls_gcs_client(self, mock_client_bucket):
     def test_get_python_executable_raises_if_None(self):
         with patch.object(sys, "executable", new=None):
             with pytest.raises(EnvironmentError):
-                training_jobs._get_python_executable()
+                google.cloud.aiplatform.utils.source_utils._get_python_executable()
 
     def test_get_python_executable_returns_python_executable(self):
-        assert "python" in training_jobs._get_python_executable().lower()
+        assert "python" in google.cloud.aiplatform.utils.source_utils._get_python_executable().lower()
 
 
 class TestTrainingScriptPythonPackager:
@@ -347,7 +350,7 @@ def setup_method(self):
 
     def teardown_method(self):
         pathlib.Path(_TEST_LOCAL_SCRIPT_FILE_NAME).unlink()
-        python_package_file = f"{training_jobs._TrainingScriptPythonPackager._ROOT_MODULE}-{training_jobs._TrainingScriptPythonPackager._SETUP_PY_VERSION}.tar.gz"
+        python_package_file = f"{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}.tar.gz"
         if pathlib.Path(python_package_file).is_file():
             pathlib.Path(python_package_file).unlink()
         subprocess.check_output(
@@ -355,34 +358,34 @@ def teardown_method(self):
                 "pip3",
                 "uninstall",
                 "-y",
-                training_jobs._TrainingScriptPythonPackager._ROOT_MODULE,
+                google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE,
             ]
         )
 
     def test_packager_creates_and_copies_python_package(self):
-        tsp = training_jobs._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
         tsp.package_and_copy(copy_method=local_copy_method)
         assert pathlib.Path(
             f"{tsp._ROOT_MODULE}-{tsp._SETUP_PY_VERSION}.tar.gz"
         ).is_file()
 
     def test_created_package_module_is_installable_and_can_be_run(self):
-        tsp = training_jobs._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
         source_dist_path = tsp.package_and_copy(copy_method=local_copy_method)
         subprocess.check_output(["pip3", "install", source_dist_path])
         module_output = subprocess.check_output(
-            [training_jobs._get_python_executable(), "-m", tsp.module_name]
+            [google.cloud.aiplatform.utils.source_utils._get_python_executable(), "-m", tsp.module_name]
         )
         assert "hello world" in module_output.decode()
 
     def test_requirements_are_in_package(self):
-        tsp = training_jobs._TrainingScriptPythonPackager(
+        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(
             _TEST_LOCAL_SCRIPT_FILE_NAME, requirements=_TEST_REQUIREMENTS
         )
         source_dist_path = tsp.package_and_copy(copy_method=local_copy_method)
         with tarfile.open(source_dist_path) as tf:
             with tempfile.TemporaryDirectory() as tmpdirname:
-                setup_py_path = f"{training_jobs._TrainingScriptPythonPackager._ROOT_MODULE}-{training_jobs._TrainingScriptPythonPackager._SETUP_PY_VERSION}/setup.py"
+                setup_py_path = f"{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}/setup.py"
                 tf.extract(setup_py_path, path=tmpdirname)
                 setup_py = core.run_setup(
                     pathlib.Path(tmpdirname, setup_py_path), stop_after="init"
@@ -395,7 +398,7 @@ def test_packaging_fails_whith_RuntimeError(self):
             mock_subprocess.communicate.return_value = (b"", b"")
             mock_subprocess.returncode = 1
             mock_popen.return_value = mock_subprocess
-            tsp = training_jobs._TrainingScriptPythonPackager(
+            tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(
                 _TEST_LOCAL_SCRIPT_FILE_NAME
             )
             with pytest.raises(RuntimeError):
@@ -404,7 +407,7 @@ def test_packaging_fails_whith_RuntimeError(self):
     def test_package_and_copy_to_gcs_copies_to_gcs(self, mock_client_bucket):
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        tsp = training_jobs._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
 
         gcs_path = tsp.package_and_copy_to_gcs(
             gcs_staging_dir=_TEST_BUCKET_NAME, project=_TEST_PROJECT
@@ -512,7 +515,7 @@ def mock_model_service_get():
 @pytest.fixture
 def mock_python_package_to_gcs():
     with mock.patch.object(
-        training_jobs._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
+            google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
     ) as mock_package_to_copy_gcs:
         mock_package_to_copy_gcs.return_value = _TEST_OUTPUT_PYTHON_PACKAGE_PATH
         yield mock_package_to_copy_gcs
@@ -638,7 +641,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -797,7 +800,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -1072,7 +1075,7 @@ def test_run_call_pipeline_service_create_with_no_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -1324,7 +1327,7 @@ def test_run_call_pipeline_service_create_distributed_training(
                 },
                 "pythonPackageSpec": {
                     "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                    "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                     "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
@@ -1339,7 +1342,7 @@ def test_run_call_pipeline_service_create_distributed_training(
                 },
                 "pythonPackageSpec": {
                     "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                    "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                     "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
@@ -1552,7 +1555,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": training_jobs._TrainingScriptPythonPackager.module_name,
+                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
@@ -2689,7 +2692,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_raises_if_anno
 
 class Test_MachineSpec:
     def test_machine_spec_return_spec_dict(self):
-        test_spec = training_jobs._MachineSpec(
+        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2708,7 +2711,7 @@ def test_machine_spec_return_spec_dict(self):
         assert test_spec.spec_dict == true_spec_dict
 
     def test_machine_spec_return_spec_dict_with_no_accelerator(self):
-        test_spec = training_jobs._MachineSpec(
+        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=0,
@@ -2723,7 +2726,7 @@ def test_machine_spec_return_spec_dict_with_no_accelerator(self):
         assert test_spec.spec_dict == true_spec_dict
 
     def test_machine_spec_spec_dict_raises_invalid_accelerator(self):
-        test_spec = training_jobs._MachineSpec(
+        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2734,7 +2737,7 @@ def test_machine_spec_spec_dict_raises_invalid_accelerator(self):
             test_spec.spec_dict
 
     def test_machine_spec_spec_dict_is_empty(self):
-        test_spec = training_jobs._MachineSpec(
+        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
             replica_count=0,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2744,7 +2747,7 @@ def test_machine_spec_spec_dict_is_empty(self):
         assert test_spec.is_empty
 
     def test_machine_spec_spec_dict_is_not_empty(self):
-        test_spec = training_jobs._MachineSpec(
+        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2757,26 +2760,26 @@ def test_machine_spec_spec_dict_is_not_empty(self):
 class Test_DistributedTrainingSpec:
     def test_machine_spec_returns_pool_spec(self):
 
-        spec = training_jobs._DistributedTrainingSpec(
-            chief_spec=training_jobs._MachineSpec(
+        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            worker_spec=training_jobs._MachineSpec(
+            worker_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=10,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            parameter_server_spec=training_jobs._MachineSpec(
+            parameter_server_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=3,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            evaluator_spec=training_jobs._MachineSpec(
+            evaluator_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2823,7 +2826,7 @@ def test_machine_spec_returns_pool_spec(self):
 
     def test_chief_worker_pool_returns_spec(self):
 
-        chief_worker_spec = training_jobs._DistributedTrainingSpec.chief_worker_pool(
+        chief_worker_spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=10,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2853,7 +2856,7 @@ def test_chief_worker_pool_returns_spec(self):
 
     def test_chief_worker_pool_returns_just_chief(self):
 
-        chief_worker_spec = training_jobs._DistributedTrainingSpec.chief_worker_pool(
+        chief_worker_spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=1,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2875,8 +2878,8 @@ def test_chief_worker_pool_returns_just_chief(self):
 
     def test_machine_spec_raise_with_more_than_one_chief_replica(self):
 
-        spec = training_jobs._DistributedTrainingSpec(
-            chief_spec=training_jobs._MachineSpec(
+        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=2,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2889,21 +2892,21 @@ def test_machine_spec_raise_with_more_than_one_chief_replica(self):
 
     def test_machine_spec_handles_missing_pools(self):
 
-        spec = training_jobs._DistributedTrainingSpec(
-            chief_spec=training_jobs._MachineSpec(
+        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            worker_spec=training_jobs._MachineSpec(replica_count=0),
-            parameter_server_spec=training_jobs._MachineSpec(
+            worker_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(replica_count=0),
+            parameter_server_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
                 replica_count=3,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            evaluator_spec=training_jobs._MachineSpec(replica_count=0),
+            evaluator_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(replica_count=0),
         )
 
         true_pool_spec = [

From 3178b11fe769306f09315732c1a1e175e05a8054 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 09:16:21 -0400
Subject: [PATCH 03/29] checkpoint

---
 google/cloud/aiplatform/jobs.py | 114 ++++++++++++++++++++++++++++++--
 1 file changed, 107 insertions(+), 7 deletions(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 1d068d3435..535e0fb9dc 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -47,6 +47,7 @@
     io as gca_io_compat,
     io_v1beta1 as gca_io_v1beta1,
     job_state as gca_job_state,
+    hyperparameter_tuning_job as gca_hyperparameter_tuning_job_compat,
     machine_resources as gca_machine_resources_compat,
     machine_resources_v1beta1 as gca_machine_resources_v1beta1,
 )
@@ -925,6 +926,13 @@ def run(
         self._block_until_complete()
 
 
+    @property
+    def worker_pool_specs(self):
+        return self._gca_resource.job_spec.worker_pool_specs
+    
+
+
+
 class DataLabelingJob(_Job):
     _resource_noun = "dataLabelingJobs"
     _getter_method = "get_data_labeling_job"
@@ -935,10 +943,102 @@ class DataLabelingJob(_Job):
     pass
 
 
-class HyperparameterTuningJob(_Job):
-    _resource_noun = "hyperparameterTuningJobs"
-    _getter_method = "get_hyperparameter_tuning_job"
-    _list_method = "list_hyperparameter_tuning_jobs"
-    _cancel_method = "cancel_hyperparameter_tuning_job"
-    _delete_method = "delete_hyperparameter_tuning_job"
-    pass
+# class HyperparameterTuningJob(_Job):
+#     _resource_noun = "hyperparameterTuningJobs"
+#     _getter_method = "get_hyperparameter_tuning_job"
+#     _list_method = "list_hyperparameter_tuning_jobs"
+#     _cancel_method = "cancel_hyperparameter_tuning_job"
+#     _delete_method = "delete_hyperparameter_tuning_job"
+    
+
+#     def __init__(self,
+#         display_name: str,
+#         custom_job: CustomJob,
+#         metric_spec: Dict[str, str],
+#         parameter_spec: dict[str, hyperparameter_tuning_job.Parameter],
+#         max_trial_count: int,
+#         parallel_trial_count: int,
+#         max_failed_trials_count: int = 0,
+#         observation_noise: Optional[str] = 'low',
+#         algorithm: Optional[str] = 'random',
+#         measurement_selection: Optional[str] = 'best',
+#         # project: Optional[str] = None,
+#         # location: Optional[str] = None,
+#         # credentials: Optional[auth_credentials.Credentials] = None,
+#         encryption_spec_key_name: Optional[str] = None,
+#         # staging_bucket: Optional[str] = None
+#     ):
+#         base.AiPlatformResourceNounWithFutureManager.__init__(self,
+#                 project=custom_job.project,
+#                 location=custom_job.location,
+#                 credentials=custom_job.credentials   
+#         )
+
+#         metrics = [
+#             gca_study.StudySpec.MetricSpec(metric_id=metric_id, goal=goal.upper())
+#             for metric_id, goal in metric_spec_dict.items()
+#         ]
+
+#         parameters = [
+#             parameter.to_parameter_spec(parameter_id=parameter_id)
+#             for parameter_id, parameter in parameter_spec.items()
+#         ]
+
+#         study_spec = gca_study_compat.StudySpec(
+#             metrics = metrics,
+#             parameters = p
+#             algorithm
+
+
+#         )
+
+#         self._gca_resource = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
+#                 display_name=display_name,
+#                 study_spec=,
+#                 max_trial_count=max_trial_count,
+#                 parallel_trial_count=parallel_trial_count,
+#                 max_failed_trial_count=max_failed_trial_count,
+#                 trial_job_spec=custom_job.job_spec._gca_resource.copy(),
+#                 encryption_spec= initializer.global_config.get_encryption_spec(
+#                     encryption_spec_key_name = encryption_spec_key_name
+#                 )
+#         )
+
+#     @staticmethod
+#     def _convert_metric_spec_dict_to_metric_spec(
+#         metric_spec_dict: Dict[str, str]) -> gca_study_compat.StudySpec.MetricSpec:
+#         return 
+
+
+#     @base.optional_sync()
+#     def run(
+#         self,
+#         service_account: Optional[str] = None,
+#         network: Optional[str] = None,
+#         timeout: Optional[int] = None, # seconds
+#         restart_job_on_worker_restart: bool=False,
+#         sync: bool = True):
+
+#         if service_account:
+#             self._gca_resource.trial_job_spec.service_account = service_account
+
+#         if network:
+#             self._gca_resource.trial_job_spec.network = network
+
+
+#         if timeout or restart_job_on_worker_restart:
+#             timout = duration_pb2.Duration(seconds=timout) if timeout else None
+#             self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
+#                     timeout=timeout, 
+#                     restart_job_on_worker_restart=restart_job_on_worker_restart
+#                 )
+
+#         self._gca_resource = self.api_client.create_hyperparameter_tuning_job(
+#                 parent=self._parent, 
+#                 hyperparameter_tuning_job=self._gca_resource
+#             )
+
+#         self._block_until_complete()
+
+
+

From a6fe1d7e0ff20d4bb01084733b3f051901dd0f2a Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 11:48:39 -0400
Subject: [PATCH 04/29] checkpoint

---
 google/cloud/aiplatform/__init__.py           |   7 +-
 google/cloud/aiplatform/compat/__init__.py    |   2 +
 .../cloud/aiplatform/compat/types/__init__.py |   2 +
 .../cloud/aiplatform/hyperparameter_tuning.py | 152 ++++++++++++
 google/cloud/aiplatform/jobs.py               | 227 ++++++++++--------
 5 files changed, 290 insertions(+), 100 deletions(-)
 create mode 100644 google/cloud/aiplatform/hyperparameter_tuning.py

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 70b90b39fd..36edec3960 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -26,9 +26,11 @@
     TimeSeriesDataset,
     VideoDataset,
 )
+from google.cloud.aiplatform import hyperparameter_tuning
+from google.cloud.aiplatform.metadata import metadata
 from google.cloud.aiplatform.models import Endpoint
 from google.cloud.aiplatform.models import Model
-from google.cloud.aiplatform.jobs import BatchPredictionJob, CustomJob
+from google.cloud.aiplatform.jobs import BatchPredictionJob, CustomJob, HyperparameterTuningJob
 from google.cloud.aiplatform.training_jobs import (
     CustomTrainingJob,
     CustomContainerTrainingJob,
@@ -39,7 +41,6 @@
     AutoMLTextTrainingJob,
     AutoMLVideoTrainingJob,
 )
-from google.cloud.aiplatform.metadata import metadata
 
 """
 Usage:
@@ -60,6 +61,7 @@
     "explain",
     "gapic",
     "init",
+    "hyperparameter_tuning"
     "log_params",
     "log_metrics",
     "get_experiment_df",
@@ -77,6 +79,7 @@
     "CustomPythonPackageTrainingJob",
     "Endpoint",
     "ImageDataset",
+    "HyperparameterTuningJob",
     "Model",
     "TabularDataset",
     "TextDataset",
diff --git a/google/cloud/aiplatform/compat/__init__.py b/google/cloud/aiplatform/compat/__init__.py
index 980c554fe1..55a72fea16 100644
--- a/google/cloud/aiplatform/compat/__init__.py
+++ b/google/cloud/aiplatform/compat/__init__.py
@@ -70,6 +70,7 @@
     types.prediction_service = types.prediction_service_v1beta1
     types.specialist_pool = types.specialist_pool_v1beta1
     types.specialist_pool_service = types.specialist_pool_service_v1beta1
+    types.study = types.study_v1beta1
     types.training_pipeline = types.training_pipeline_v1beta1
     types.metadata_service = types.metadata_service_v1beta1
     types.tensorboard_service = types.tensorboard_service_v1beta1
@@ -120,6 +121,7 @@
     types.prediction_service = types.prediction_service_v1
     types.specialist_pool = types.specialist_pool_v1
     types.specialist_pool_service = types.specialist_pool_service_v1
+    types.study = types.study_v1
     types.training_pipeline = types.training_pipeline_v1
 
 __all__ = (
diff --git a/google/cloud/aiplatform/compat/types/__init__.py b/google/cloud/aiplatform/compat/types/__init__.py
index f45bb2e11e..7bd512e7e8 100644
--- a/google/cloud/aiplatform/compat/types/__init__.py
+++ b/google/cloud/aiplatform/compat/types/__init__.py
@@ -49,6 +49,7 @@
     prediction_service as prediction_service_v1beta1,
     specialist_pool as specialist_pool_v1beta1,
     specialist_pool_service as specialist_pool_service_v1beta1,
+    study as study_v1beta1,
     training_pipeline as training_pipeline_v1beta1,
     metadata_service as metadata_service_v1beta1,
     tensorboard_service as tensorboard_service_v1beta1,
@@ -90,6 +91,7 @@
     prediction_service as prediction_service_v1,
     specialist_pool as specialist_pool_v1,
     specialist_pool_service as specialist_pool_service_v1,
+    study as study_v1,
     training_pipeline as training_pipeline_v1,
 )
 
diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
new file mode 100644
index 0000000000..045e3f955e
--- /dev/null
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -0,0 +1,152 @@
+import abc
+from typing import Dict, List, Optional, Tuple, Union
+
+import proto
+
+from google.cloud.aiplatform.compat.types import study as gca_study_compat
+
+_scale_type_map = {
+    'linear': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
+    'log': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LOG_SCALE,
+    'reverse_log': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_REVERSE_LOG_SCALE,
+}
+
+
+class _ParameterSpec(metaclass=abc.ABCMeta):
+
+    def __init__(
+        self,
+        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None):
+
+        self.conditional_parameter_spec = conditional_parameter_spec
+        self.parent_values = parent_values
+
+    @property
+    @classmethod
+    @abc.abstractmethod
+    def _proto_parameter_value_class(self) -> proto.Message:
+        pass
+
+    @property
+    @classmethod
+    @abc.abstractmethod
+    def _parameter_value_map(self) -> Tuple[Tuple[str, str]]:
+        pass
+
+    @property
+    @classmethod
+    @abc.abstractmethod
+    def _parameter_spec_value_key(self) -> Tuple[Tuple[str, str]]:
+        pass
+    
+
+    @property 
+    def _proto_parameter_value_spec(self) -> proto.Message:
+        proto_parameter_value_spec = self._proto_parameter_value_class()
+        for self_attr_key, proto_attr_key in self._parameter_value_map:
+            setattr(proto_parameter_value_spec, proto_attr_key, getattr(self, self_attr_key))
+        return proto_parameter_value_spec
+
+
+    def _to_parameter_spec(self, parameter_id: str) -> gca_study_compat.StudySpec.ParameterSpec:
+        # TODO: Conditional parameters
+        parameter_spec = gca_study_compat.StudySpec.ParameterSpec(
+                parameter_id=parameter_id,
+                scale_type=_scale_type_map.get(getattr(self, 'scale'))
+            )
+
+        setattr(parameter_spec, self._parameter_spec_value_key, self._proto_parameter_value_spec)
+
+        return parameter_spec
+
+
+class DoubleParameterSpec(_ParameterSpec):
+
+    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.DoubleValueSpec
+    _parameter_value_map = (('min', 'min_value'), ('max', 'max_value'))
+    _parameter_spec_value_key = 'double_value_spec'
+    
+    def __init__(
+        self,
+        min: float,
+        max: float,
+        scale: str,
+        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None
+        ):
+
+        super().__init__(
+            conditional_parameter_spec=conditional_parameter_spec,
+            parent_values=parent_values)
+
+        self.min = min
+        self.max = max
+        self.scale=scale
+
+
+class IntegerParameterSpec(_ParameterSpec):
+   
+    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.IntegerValueSpec
+    _parameter_value_map = (('min', 'min_value'), ('max', 'max_value'))
+    _parameter_spec_value_key = 'integer_value_spec'
+
+    def __init__(
+        self,
+        min: int,
+        max: int,
+        scale: str,
+        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None
+        ):
+
+        super().__init__(
+            conditional_parameter_spec=conditional_parameter_spec,
+            parent_value=parent_values)
+
+        self.min = min
+        self.max = max,
+        self.scale=scale
+
+class CategoricalValueSpec(_ParameterSpec):
+
+    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.CategoricalValueSpec
+    _parameter_value_map = (('values', 'values'))
+    _parameter_spec_value_key = 'categorical_value_spec'
+    
+    def __init__(
+        self,
+        values: List[str],
+        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None
+        ):
+
+        super().__init__(
+            conditional_parameter_spec=conditional_parameter_spec,
+            parent_value=parent_values)
+
+        self.values = values
+
+
+class DiscreteValueSpec(_ParameterSpec):
+
+    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.DiscreteValueSpec
+    _parameter_value_map = (('values', 'values'))
+    _parameter_spec_value_key = 'discrete_value_spec'
+    
+    def __init__(
+        self,
+        values: List[float],
+        scale: str,
+        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None
+        ):
+
+        super().__init__(
+            conditional_parameter_spec=conditional_parameter_spec,
+            parent_value=parent_values)
+
+        self.values = values
+        self.scale = scale
+
+
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 535e0fb9dc..aa06fc6370 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -18,6 +18,7 @@
 from typing import Iterable, Optional, Union, Sequence, Dict, List
 
 import abc
+import copy
 import sys
 import time
 import logging
@@ -33,6 +34,7 @@
 from google.cloud.aiplatform import compat
 from google.cloud.aiplatform import constants
 from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform import hyperparameter_tuning
 from google.cloud.aiplatform import utils
 from  google.cloud.aiplatform.utils import source_utils
 from  google.cloud.aiplatform.utils import worker_spec_utils
@@ -50,6 +52,7 @@
     hyperparameter_tuning_job as gca_hyperparameter_tuning_job_compat,
     machine_resources as gca_machine_resources_compat,
     machine_resources_v1beta1 as gca_machine_resources_v1beta1,
+    study as gca_study_compat
 )
 
 logging.basicConfig(level=logging.INFO, stream=sys.stdout)
@@ -181,7 +184,7 @@ def _block_until_complete(self):
                 previous_time = current_time
             time.sleep(wait)
 
-        _LOGGER.log_action_completed_against_resource("", "run", self)
+        _LOGGER.log_action_completed_against_resource("run", "completed", self)
 
         # Error is only populated when the job state is
         # JOB_STATE_FAILED or JOB_STATE_CANCELLED.
@@ -903,7 +906,7 @@ def run(
         network: Optional[str] = None,
         timeout: Optional[int] = None, # seconds
         restart_job_on_worker_restart: bool=False,
-        sync: bool = True):
+        sync: bool = True) -> None:
 
         if service_account:
             self._gca_resource.service_account = service_account
@@ -919,16 +922,24 @@ def run(
                     restart_job_on_worker_restart=restart_job_on_worker_restart
                 )
 
+        _LOGGER.log_create_with_lro(self.__class__)
+
         self._gca_resource = self.api_client.create_custom_job(
                 parent=self._parent, custom_job=self._gca_resource
             )
 
+        _LOGGER.log_create_complete(self.__class__, self._gca_resource, "custom_job")
+
+        _LOGGER.info(
+            "View Custom Job:\n%s" % self._dashboard_uri()
+        )
+
         self._block_until_complete()
 
 
     @property
-    def worker_pool_specs(self):
-        return self._gca_resource.job_spec.worker_pool_specs
+    def job_spec(self):
+        return self._gca_resource.job_spec
     
 
 
@@ -943,102 +954,122 @@ class DataLabelingJob(_Job):
     pass
 
 
-# class HyperparameterTuningJob(_Job):
-#     _resource_noun = "hyperparameterTuningJobs"
-#     _getter_method = "get_hyperparameter_tuning_job"
-#     _list_method = "list_hyperparameter_tuning_jobs"
-#     _cancel_method = "cancel_hyperparameter_tuning_job"
-#     _delete_method = "delete_hyperparameter_tuning_job"
+_search_algorithm_to_proto_value = {
+    'random': gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
+    'grid': gca_study_compat.StudySpec.Algorithm.GRID_SEARCH
+
+}
+
+_measurement_selection_to_proto_value = {
+    'best': gca_study_compat.StudySpec.MeasurementSelectionType.BEST_MEASUREMENT,
+    'last': gca_study_compat.StudySpec.MeasurementSelectionType.LAST_MEASUREMENT
+}
+
+class HyperparameterTuningJob(_Job):
+    _resource_noun = "hyperparameterTuningJobs"
+    _getter_method = "get_hyperparameter_tuning_job"
+    _list_method = "list_hyperparameter_tuning_jobs"
+    _cancel_method = "cancel_hyperparameter_tuning_job"
+    _delete_method = "delete_hyperparameter_tuning_job"
+    _job_type = "training"
     
 
-#     def __init__(self,
-#         display_name: str,
-#         custom_job: CustomJob,
-#         metric_spec: Dict[str, str],
-#         parameter_spec: dict[str, hyperparameter_tuning_job.Parameter],
-#         max_trial_count: int,
-#         parallel_trial_count: int,
-#         max_failed_trials_count: int = 0,
-#         observation_noise: Optional[str] = 'low',
-#         algorithm: Optional[str] = 'random',
-#         measurement_selection: Optional[str] = 'best',
-#         # project: Optional[str] = None,
-#         # location: Optional[str] = None,
-#         # credentials: Optional[auth_credentials.Credentials] = None,
-#         encryption_spec_key_name: Optional[str] = None,
-#         # staging_bucket: Optional[str] = None
-#     ):
-#         base.AiPlatformResourceNounWithFutureManager.__init__(self,
-#                 project=custom_job.project,
-#                 location=custom_job.location,
-#                 credentials=custom_job.credentials   
-#         )
-
-#         metrics = [
-#             gca_study.StudySpec.MetricSpec(metric_id=metric_id, goal=goal.upper())
-#             for metric_id, goal in metric_spec_dict.items()
-#         ]
-
-#         parameters = [
-#             parameter.to_parameter_spec(parameter_id=parameter_id)
-#             for parameter_id, parameter in parameter_spec.items()
-#         ]
-
-#         study_spec = gca_study_compat.StudySpec(
-#             metrics = metrics,
-#             parameters = p
-#             algorithm
-
-
-#         )
-
-#         self._gca_resource = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
-#                 display_name=display_name,
-#                 study_spec=,
-#                 max_trial_count=max_trial_count,
-#                 parallel_trial_count=parallel_trial_count,
-#                 max_failed_trial_count=max_failed_trial_count,
-#                 trial_job_spec=custom_job.job_spec._gca_resource.copy(),
-#                 encryption_spec= initializer.global_config.get_encryption_spec(
-#                     encryption_spec_key_name = encryption_spec_key_name
-#                 )
-#         )
-
-#     @staticmethod
-#     def _convert_metric_spec_dict_to_metric_spec(
-#         metric_spec_dict: Dict[str, str]) -> gca_study_compat.StudySpec.MetricSpec:
-#         return 
-
-
-#     @base.optional_sync()
-#     def run(
-#         self,
-#         service_account: Optional[str] = None,
-#         network: Optional[str] = None,
-#         timeout: Optional[int] = None, # seconds
-#         restart_job_on_worker_restart: bool=False,
-#         sync: bool = True):
-
-#         if service_account:
-#             self._gca_resource.trial_job_spec.service_account = service_account
-
-#         if network:
-#             self._gca_resource.trial_job_spec.network = network
-
-
-#         if timeout or restart_job_on_worker_restart:
-#             timout = duration_pb2.Duration(seconds=timout) if timeout else None
-#             self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
-#                     timeout=timeout, 
-#                     restart_job_on_worker_restart=restart_job_on_worker_restart
-#                 )
-
-#         self._gca_resource = self.api_client.create_hyperparameter_tuning_job(
-#                 parent=self._parent, 
-#                 hyperparameter_tuning_job=self._gca_resource
-#             )
-
-#         self._block_until_complete()
+    def __init__(self,
+        display_name: str,
+        custom_job: CustomJob,
+        metric_spec: Dict[str, str],
+        parameter_spec: Dict[str, hyperparameter_tuning._ParameterSpec],
+        max_trial_count: int,
+        parallel_trial_count: int,
+        max_failed_trial_count: int = 0,
+        search_algorithm: Optional[str] = 'random',
+        # observation_noise: Optional[str] = 'low',
+        measurement_selection: Optional[str] = 'best',
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+        encryption_spec_key_name: Optional[str] = None,
+        # staging_bucket: Optional[str] = None
+    ):
+        base.AiPlatformResourceNounWithFutureManager.__init__(self,
+                project=project,
+                location=location,
+                credentials=credentials, 
+        )
+
+        self._parent = aiplatform.initializer.global_config.common_location_path(
+                project=project,
+                location=location
+            )
+
+        metrics = [
+            gca_study_compat.StudySpec.MetricSpec(metric_id=metric_id, goal=goal.upper())
+            for metric_id, goal in metric_spec.items()
+        ]
+
+        parameters = [
+            parameter._to_parameter_spec(parameter_id=parameter_id)
+            for parameter_id, parameter in parameter_spec.items()
+        ]
+
+        study_spec = gca_study_compat.StudySpec(
+            metrics = metrics,
+            parameters = parameters,
+            algorithm = _search_algorithm_to_proto_value[search_algorithm],
+            # observation_noise = observation_noise.upper(),
+            measurement_selection_type = _measurement_selection_to_proto_value[measurement_selection]
+        )
+
+        self._gca_resource = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
+                display_name=display_name,
+                study_spec=study_spec,
+                max_trial_count=max_trial_count,
+                parallel_trial_count=parallel_trial_count,
+                max_failed_trial_count=max_failed_trial_count,
+                trial_job_spec=copy.deepcopy(custom_job.job_spec),
+                encryption_spec= initializer.global_config.get_encryption_spec(
+                    encryption_spec_key_name = encryption_spec_key_name
+                )
+        )
+
+    @base.optional_sync()
+    def run(
+        self,
+        service_account: Optional[str] = None,
+        network: Optional[str] = None,
+        timeout: Optional[int] = None, # seconds
+        restart_job_on_worker_restart: bool=False,
+        sync: bool = True) -> None:
+
+        if service_account:
+            self._gca_resource.trial_job_spec.service_account = service_account
+
+        if network:
+            self._gca_resource.trial_job_spec.network = network
+
+
+        if timeout or restart_job_on_worker_restart:
+            timout = duration_pb2.Duration(seconds=timout) if timeout else None
+            self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
+                    timeout=timeout, 
+                    restart_job_on_worker_restart=restart_job_on_worker_restart
+                )
+
+        _LOGGER.log_create_with_lro(self.__class__)
+
+        self._gca_resource = self.api_client.create_hyperparameter_tuning_job(
+                parent=self._parent, 
+                hyperparameter_tuning_job=self._gca_resource
+            )
+
+        _LOGGER.log_create_complete(self.__class__, self._gca_resource, "hpt_job")
+
+        _LOGGER.info(
+            "View HyperparameterTuningJob:\n%s" % self._dashboard_uri()
+        )
+
+
+        self._block_until_complete()
 
 
 

From b5cc6e59c6998b1bc6cf507949a8b1871eb7f735 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 11:55:21 -0400
Subject: [PATCH 05/29] chore: update test imports

---
 tests/unit/aiplatform/test_training_jobs.py | 88 ++++++++++-----------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py
index 8f783edc15..04c5d609d5 100644
--- a/tests/unit/aiplatform/test_training_jobs.py
+++ b/tests/unit/aiplatform/test_training_jobs.py
@@ -30,9 +30,9 @@
 
 from google.auth import credentials as auth_credentials
 
-import google.cloud.aiplatform.utils
-import google.cloud.aiplatform.utils.source_utils
-import google.cloud.aiplatform.utils.worker_spec_utils
+from google.cloud.aiplatform import utils
+from utils import source_utils
+from utils import worker_spec_utils
 from google.cloud import aiplatform
 
 from google.cloud.aiplatform import datasets
@@ -237,7 +237,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_bucket(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
+        gcs_path = utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_BUCKET_NAME,
             project=_TEST_PROJECT,
@@ -264,7 +264,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_gcs_path(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
+        gcs_path = utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_GCS_PATH_WITH_TRAILING_SLASH,
             project=_TEST_PROJECT,
@@ -292,7 +292,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client_with_trailing_slash(
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
+        gcs_path = utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_GCS_PATH,
             project=_TEST_PROJECT,
@@ -318,7 +318,7 @@ def test_timestamp_copy_to_gcs_calls_gcs_client(self, mock_client_bucket):
 
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        gcs_path = google.cloud.aiplatform.utils._timestamped_copy_to_gcs(
+        gcs_path = utils._timestamped_copy_to_gcs(
             local_file_path=_TEST_LOCAL_SCRIPT_FILE_PATH,
             gcs_dir=_TEST_BUCKET_NAME,
             project=_TEST_PROJECT,
@@ -335,10 +335,10 @@ def test_timestamp_copy_to_gcs_calls_gcs_client(self, mock_client_bucket):
     def test_get_python_executable_raises_if_None(self):
         with patch.object(sys, "executable", new=None):
             with pytest.raises(EnvironmentError):
-                google.cloud.aiplatform.utils.source_utils._get_python_executable()
+                source_utils._get_python_executable()
 
     def test_get_python_executable_returns_python_executable(self):
-        assert "python" in google.cloud.aiplatform.utils.source_utils._get_python_executable().lower()
+        assert "python" in source_utils._get_python_executable().lower()
 
 
 class TestTrainingScriptPythonPackager:
@@ -350,7 +350,7 @@ def setup_method(self):
 
     def teardown_method(self):
         pathlib.Path(_TEST_LOCAL_SCRIPT_FILE_NAME).unlink()
-        python_package_file = f"{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}.tar.gz"
+        python_package_file = f"{source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}.tar.gz"
         if pathlib.Path(python_package_file).is_file():
             pathlib.Path(python_package_file).unlink()
         subprocess.check_output(
@@ -358,34 +358,34 @@ def teardown_method(self):
                 "pip3",
                 "uninstall",
                 "-y",
-                google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE,
+                source_utils._TrainingScriptPythonPackager._ROOT_MODULE,
             ]
         )
 
     def test_packager_creates_and_copies_python_package(self):
-        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
         tsp.package_and_copy(copy_method=local_copy_method)
         assert pathlib.Path(
             f"{tsp._ROOT_MODULE}-{tsp._SETUP_PY_VERSION}.tar.gz"
         ).is_file()
 
     def test_created_package_module_is_installable_and_can_be_run(self):
-        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
         source_dist_path = tsp.package_and_copy(copy_method=local_copy_method)
         subprocess.check_output(["pip3", "install", source_dist_path])
         module_output = subprocess.check_output(
-            [google.cloud.aiplatform.utils.source_utils._get_python_executable(), "-m", tsp.module_name]
+            [source_utils._get_python_executable(), "-m", tsp.module_name]
         )
         assert "hello world" in module_output.decode()
 
     def test_requirements_are_in_package(self):
-        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(
+        tsp = source_utils._TrainingScriptPythonPackager(
             _TEST_LOCAL_SCRIPT_FILE_NAME, requirements=_TEST_REQUIREMENTS
         )
         source_dist_path = tsp.package_and_copy(copy_method=local_copy_method)
         with tarfile.open(source_dist_path) as tf:
             with tempfile.TemporaryDirectory() as tmpdirname:
-                setup_py_path = f"{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}/setup.py"
+                setup_py_path = f"{source_utils._TrainingScriptPythonPackager._ROOT_MODULE}-{source_utils._TrainingScriptPythonPackager._SETUP_PY_VERSION}/setup.py"
                 tf.extract(setup_py_path, path=tmpdirname)
                 setup_py = core.run_setup(
                     pathlib.Path(tmpdirname, setup_py_path), stop_after="init"
@@ -398,7 +398,7 @@ def test_packaging_fails_whith_RuntimeError(self):
             mock_subprocess.communicate.return_value = (b"", b"")
             mock_subprocess.returncode = 1
             mock_popen.return_value = mock_subprocess
-            tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(
+            tsp = source_utils._TrainingScriptPythonPackager(
                 _TEST_LOCAL_SCRIPT_FILE_NAME
             )
             with pytest.raises(RuntimeError):
@@ -407,7 +407,7 @@ def test_packaging_fails_whith_RuntimeError(self):
     def test_package_and_copy_to_gcs_copies_to_gcs(self, mock_client_bucket):
         mock_client_bucket, mock_blob = mock_client_bucket
 
-        tsp = google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
+        tsp = source_utils._TrainingScriptPythonPackager(_TEST_LOCAL_SCRIPT_FILE_NAME)
 
         gcs_path = tsp.package_and_copy_to_gcs(
             gcs_staging_dir=_TEST_BUCKET_NAME, project=_TEST_PROJECT
@@ -515,7 +515,7 @@ def mock_model_service_get():
 @pytest.fixture
 def mock_python_package_to_gcs():
     with mock.patch.object(
-            google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
+            source_utils._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
     ) as mock_package_to_copy_gcs:
         mock_package_to_copy_gcs.return_value = _TEST_OUTPUT_PYTHON_PACKAGE_PATH
         yield mock_package_to_copy_gcs
@@ -641,7 +641,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -800,7 +800,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -1075,7 +1075,7 @@ def test_run_call_pipeline_service_create_with_no_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
@@ -1327,7 +1327,7 @@ def test_run_call_pipeline_service_create_distributed_training(
                 },
                 "pythonPackageSpec": {
                     "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                    "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                     "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
@@ -1342,7 +1342,7 @@ def test_run_call_pipeline_service_create_distributed_training(
                 },
                 "pythonPackageSpec": {
                     "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                    "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                     "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
@@ -1555,7 +1555,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
             },
             "pythonPackageSpec": {
                 "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
+                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
                 "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
@@ -2692,7 +2692,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_raises_if_anno
 
 class Test_MachineSpec:
     def test_machine_spec_return_spec_dict(self):
-        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        test_spec = worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2711,7 +2711,7 @@ def test_machine_spec_return_spec_dict(self):
         assert test_spec.spec_dict == true_spec_dict
 
     def test_machine_spec_return_spec_dict_with_no_accelerator(self):
-        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        test_spec = worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=0,
@@ -2726,7 +2726,7 @@ def test_machine_spec_return_spec_dict_with_no_accelerator(self):
         assert test_spec.spec_dict == true_spec_dict
 
     def test_machine_spec_spec_dict_raises_invalid_accelerator(self):
-        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        test_spec = worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2737,7 +2737,7 @@ def test_machine_spec_spec_dict_raises_invalid_accelerator(self):
             test_spec.spec_dict
 
     def test_machine_spec_spec_dict_is_empty(self):
-        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        test_spec = worker_spec_utils._MachineSpec(
             replica_count=0,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2747,7 +2747,7 @@ def test_machine_spec_spec_dict_is_empty(self):
         assert test_spec.is_empty
 
     def test_machine_spec_spec_dict_is_not_empty(self):
-        test_spec = google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        test_spec = worker_spec_utils._MachineSpec(
             replica_count=_TEST_REPLICA_COUNT,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2760,26 +2760,26 @@ def test_machine_spec_spec_dict_is_not_empty(self):
 class Test_DistributedTrainingSpec:
     def test_machine_spec_returns_pool_spec(self):
 
-        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
-            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        spec = worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            worker_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+            worker_spec=worker_spec_utils._MachineSpec(
                 replica_count=10,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            parameter_server_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+            parameter_server_spec=worker_spec_utils._MachineSpec(
                 replica_count=3,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            evaluator_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+            evaluator_spec=worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2826,7 +2826,7 @@ def test_machine_spec_returns_pool_spec(self):
 
     def test_chief_worker_pool_returns_spec(self):
 
-        chief_worker_spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
+        chief_worker_spec = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=10,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2856,7 +2856,7 @@ def test_chief_worker_pool_returns_spec(self):
 
     def test_chief_worker_pool_returns_just_chief(self):
 
-        chief_worker_spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
+        chief_worker_spec = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=1,
             machine_type=_TEST_MACHINE_TYPE,
             accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2878,8 +2878,8 @@ def test_chief_worker_pool_returns_just_chief(self):
 
     def test_machine_spec_raise_with_more_than_one_chief_replica(self):
 
-        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
-            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        spec = worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=worker_spec_utils._MachineSpec(
                 replica_count=2,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
@@ -2892,21 +2892,21 @@ def test_machine_spec_raise_with_more_than_one_chief_replica(self):
 
     def test_machine_spec_handles_missing_pools(self):
 
-        spec = google.cloud.aiplatform.utils.worker_spec_utils._DistributedTrainingSpec(
-            chief_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+        spec = worker_spec_utils._DistributedTrainingSpec(
+            chief_spec=worker_spec_utils._MachineSpec(
                 replica_count=1,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            worker_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(replica_count=0),
-            parameter_server_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(
+            worker_spec=worker_spec_utils._MachineSpec(replica_count=0),
+            parameter_server_spec=worker_spec_utils._MachineSpec(
                 replica_count=3,
                 machine_type=_TEST_MACHINE_TYPE,
                 accelerator_count=_TEST_ACCELERATOR_COUNT,
                 accelerator_type=_TEST_ACCELERATOR_TYPE,
             ),
-            evaluator_spec=google.cloud.aiplatform.utils.worker_spec_utils._MachineSpec(replica_count=0),
+            evaluator_spec=worker_spec_utils._MachineSpec(replica_count=0),
         )
 
         true_pool_spec = [

From 4a1b0cae263bbda951b4b04d944e3dba9922e223 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 12:05:52 -0400
Subject: [PATCH 06/29] fix: remove added __init__ files

---
 google/__init__.py       | 0
 google/cloud/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 google/__init__.py
 delete mode 100644 google/cloud/__init__.py

diff --git a/google/__init__.py b/google/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/google/cloud/__init__.py b/google/cloud/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000

From 10f4f80dc29b03db318bbd217c604e0187d3e45e Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 12:15:36 -0400
Subject: [PATCH 07/29] chore: update test imports

---
 tests/unit/aiplatform/test_training_jobs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py
index 04c5d609d5..b4745d29f1 100644
--- a/tests/unit/aiplatform/test_training_jobs.py
+++ b/tests/unit/aiplatform/test_training_jobs.py
@@ -31,8 +31,8 @@
 from google.auth import credentials as auth_credentials
 
 from google.cloud.aiplatform import utils
-from utils import source_utils
-from utils import worker_spec_utils
+from google.cloud.aiplatform.utils import source_utils
+from google.cloud.aiplatform.utils import worker_spec_utils
 from google.cloud import aiplatform
 
 from google.cloud.aiplatform import datasets

From d61079c49189cb10f0317a0e2c9d3f4a723301fc Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 13:07:10 -0400
Subject: [PATCH 08/29] feat: add hp tuning metric reporter to training utils

---
 google/cloud/aiplatform/__init__.py       |  2 +
 google/cloud/aiplatform/training_utils.py | 88 ++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 36edec3960..99f7df09d3 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -41,6 +41,7 @@
     AutoMLTextTrainingJob,
     AutoMLVideoTrainingJob,
 )
+from google.cloud.aiplatform import training_utils
 
 """
 Usage:
@@ -67,6 +68,7 @@
     "get_experiment_df",
     "get_pipeline_df",
     "start_run",
+    "training_utils",
     "AutoMLImageTrainingJob",
     "AutoMLTabularTrainingJob",
     "AutoMLForecastingTrainingJob",
diff --git a/google/cloud/aiplatform/training_utils.py b/google/cloud/aiplatform/training_utils.py
index fea60c5005..71645a8446 100644
--- a/google/cloud/aiplatform/training_utils.py
+++ b/google/cloud/aiplatform/training_utils.py
@@ -15,9 +15,10 @@
 # limitations under the License.
 #
 
+import collections
 import json
 import os
-
+import time
 from typing import Dict, Optional
 
 
@@ -103,3 +104,88 @@ def tf_config(self) -> Optional[Dict]:
             return json.loads(tf_config_env)
         else:
             return None
+
+
+_DEFAULT_HYPERPARAMETER_METRIC_TAG = 'training/hptuning/metric'
+_DEFAULT_METRIC_PATH = '/tmp/hypertune/output.metrics'
+# TODO(0olwzo0): consider to make it configurable
+_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE = 100
+
+
+class _HyperparameterTuningJobReporterSingleton:
+    """Main class for HyperTune."""
+
+    initialized = False
+
+    @classmethod
+    def initialize(cls):
+        if cls.initialized:
+            return
+
+        cls.metric_path = os.environ.get('CLOUD_ML_HP_METRIC_FILE',
+                                          _DEFAULT_METRIC_PATH)
+        if not os.path.exists(os.path.dirname(cls.metric_path)):
+            os.makedirs(os.path.dirname(cls.metric_path))
+
+        cls.trial_id = os.environ.get('CLOUD_ML_TRIAL_ID', 0)
+        cls.metrics_queue = collections.deque(
+            maxlen=_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE)
+
+        cls.initialized = True
+
+    @classmethod
+    def _dump_metrics_to_file(cls):
+        with open(cls.metric_path, 'w') as metric_file:
+            for metric in cls.metrics_queue:
+                metric_file.write(json.dumps(metric, sort_keys=True) + '\n')
+
+    @classmethod
+    def report_hyperparameter_tuning_metric(cls,
+                                            hyperparameter_metric_tag,
+                                            metric_value,
+                                            global_step=None,
+                                            checkpoint_path=''):
+        """Method to report hyperparameter tuning metric.
+        Args:
+          hyperparameter_metric_tag: The hyperparameter metric name this metric
+            value is associated with. Should keep consistent with the tag
+            specified in HyperparameterSpec.
+          metric_value: float, the values for the hyperparameter metric to report.
+          global_step: int, the global step this metric value is associated with.
+          checkpoint_path: The checkpoint path which can be used to warmstart from.
+        """
+        metric_value = float(metric_value)
+        metric_tag = _DEFAULT_HYPERPARAMETER_METRIC_TAG
+        if hyperparameter_metric_tag:
+            metric_tag = hyperparameter_metric_tag
+        metric_body = {
+            'timestamp': time.time(),
+            'trial': str(cls.trial_id),
+            metric_tag: str(metric_value),
+            'global_step': str(int(global_step) if global_step else 0),
+            'checkpoint_path': checkpoint_path
+        }
+        cls.metrics_queue.append(metric_body)
+        cls._dump_metrics_to_file()
+
+
+def report_hyperparameter_tuning_metric(
+        metrics: Dict[str, float],
+        global_step: Optional[int] = None,
+        checkpoint_path=''
+    ):
+        _HyperparameterTuningJobReporterSingleton.initialize()
+        
+        for hyperparameter_metric_tag, metric_value in metrics.items():
+            _HyperparameterTuningJobReporterSingleton.report_hyperparameter_tuning_metric(
+                    hyperparameter_metric_tag=hyperparameter_metric_tag,
+                    metric_value=metric_value,
+                    global_step=global_step,
+                    checkpoint_path=checkpoint_path
+                )
+
+    
+
+
+
+

From 455944dc80812ed31669262a9e1e38b71dc64da1 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 13:11:08 -0400
Subject: [PATCH 09/29] chore: make plural

---
 google/cloud/aiplatform/training_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/training_utils.py b/google/cloud/aiplatform/training_utils.py
index 71645a8446..48af524e3f 100644
--- a/google/cloud/aiplatform/training_utils.py
+++ b/google/cloud/aiplatform/training_utils.py
@@ -169,7 +169,7 @@ def report_hyperparameter_tuning_metric(cls,
         cls._dump_metrics_to_file()
 
 
-def report_hyperparameter_tuning_metric(
+def report_hyperparameter_tuning_metrics(
         metrics: Dict[str, float],
         global_step: Optional[int] = None,
         checkpoint_path=''

From a8e0da693a099b49815037442d54cbdced53ee5b Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 16:07:48 -0400
Subject: [PATCH 10/29] feat: added trials property, refactored job classes,
 updating training pipelines to use snake case instead of camel case to be
 consistent with custom job

---
 google/cloud/aiplatform/__init__.py           |   9 +-
 google/cloud/aiplatform/base.py               |   5 +
 .../cloud/aiplatform/hyperparameter_tuning.py | 121 +++--
 google/cloud/aiplatform/jobs.py               | 248 +++++----
 google/cloud/aiplatform/training_jobs.py      |  60 +--
 google/cloud/aiplatform/training_utils.py     |  68 ++-
 google/cloud/aiplatform/utils/__init__.py     |   2 +-
 google/cloud/aiplatform/utils/source_utils.py |   2 +-
 .../aiplatform/utils/worker_spec_utils.py     |   6 +-
 tests/unit/aiplatform/test_end_to_end.py      |  46 +-
 tests/unit/aiplatform/test_training_jobs.py   | 504 ++++++++++--------
 11 files changed, 585 insertions(+), 486 deletions(-)

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 99f7df09d3..32ec7538e0 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -30,7 +30,11 @@
 from google.cloud.aiplatform.metadata import metadata
 from google.cloud.aiplatform.models import Endpoint
 from google.cloud.aiplatform.models import Model
-from google.cloud.aiplatform.jobs import BatchPredictionJob, CustomJob, HyperparameterTuningJob
+from google.cloud.aiplatform.jobs import (
+    BatchPredictionJob,
+    CustomJob,
+    HyperparameterTuningJob,
+)
 from google.cloud.aiplatform.training_jobs import (
     CustomTrainingJob,
     CustomContainerTrainingJob,
@@ -62,8 +66,7 @@
     "explain",
     "gapic",
     "init",
-    "hyperparameter_tuning"
-    "log_params",
+    "hyperparameter_tuning" "log_params",
     "log_metrics",
     "get_experiment_df",
     "get_pipeline_df",
diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py
index f46db9c47e..b42b630f75 100644
--- a/google/cloud/aiplatform/base.py
+++ b/google/cloud/aiplatform/base.py
@@ -543,6 +543,11 @@ def update_time(self) -> datetime.datetime:
         self._sync_gca_resource()
         return self._gca_resource.update_time
 
+    @property
+    def gca_resource(self) -> proto.Message:
+        """The underlying resource proto represenation."""
+        return self._gca_resource
+
     def __repr__(self) -> str:
         return f"{object.__repr__(self)} \nresource name: {self.resource_name}"
 
diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 045e3f955e..5a2ef35fda 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -6,18 +6,18 @@
 from google.cloud.aiplatform.compat.types import study as gca_study_compat
 
 _scale_type_map = {
-    'linear': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
-    'log': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LOG_SCALE,
-    'reverse_log': gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_REVERSE_LOG_SCALE,
+    "linear": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
+    "log": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LOG_SCALE,
+    "reverse_log": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_REVERSE_LOG_SCALE,
 }
 
 
 class _ParameterSpec(metaclass=abc.ABCMeta):
-
     def __init__(
         self,
-        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None):
+        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None,
+    ):
 
         self.conditional_parameter_spec = conditional_parameter_spec
         self.parent_values = parent_values
@@ -39,114 +39,131 @@ def _parameter_value_map(self) -> Tuple[Tuple[str, str]]:
     @abc.abstractmethod
     def _parameter_spec_value_key(self) -> Tuple[Tuple[str, str]]:
         pass
-    
 
-    @property 
+    @property
     def _proto_parameter_value_spec(self) -> proto.Message:
         proto_parameter_value_spec = self._proto_parameter_value_class()
         for self_attr_key, proto_attr_key in self._parameter_value_map:
-            setattr(proto_parameter_value_spec, proto_attr_key, getattr(self, self_attr_key))
+            setattr(
+                proto_parameter_value_spec, proto_attr_key, getattr(self, self_attr_key)
+            )
         return proto_parameter_value_spec
 
-
-    def _to_parameter_spec(self, parameter_id: str) -> gca_study_compat.StudySpec.ParameterSpec:
+    def _to_parameter_spec(
+        self, parameter_id: str
+    ) -> gca_study_compat.StudySpec.ParameterSpec:
         # TODO: Conditional parameters
         parameter_spec = gca_study_compat.StudySpec.ParameterSpec(
-                parameter_id=parameter_id,
-                scale_type=_scale_type_map.get(getattr(self, 'scale'))
-            )
+            parameter_id=parameter_id,
+            scale_type=_scale_type_map.get(getattr(self, "scale")),
+        )
 
-        setattr(parameter_spec, self._parameter_spec_value_key, self._proto_parameter_value_spec)
+        setattr(
+            parameter_spec,
+            self._parameter_spec_value_key,
+            self._proto_parameter_value_spec,
+        )
 
         return parameter_spec
 
 
 class DoubleParameterSpec(_ParameterSpec):
 
-    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.DoubleValueSpec
-    _parameter_value_map = (('min', 'min_value'), ('max', 'max_value'))
-    _parameter_spec_value_key = 'double_value_spec'
-    
+    _proto_parameter_value_class = (
+        gca_study_compat.StudySpec.ParameterSpec.DoubleValueSpec
+    )
+    _parameter_value_map = (("min", "min_value"), ("max", "max_value"))
+    _parameter_spec_value_key = "double_value_spec"
+
     def __init__(
         self,
         min: float,
         max: float,
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None
-        ):
+        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None,
+    ):
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_values=parent_values)
+            parent_values=parent_values,
+        )
 
         self.min = min
         self.max = max
-        self.scale=scale
+        self.scale = scale
 
 
 class IntegerParameterSpec(_ParameterSpec):
-   
-    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.IntegerValueSpec
-    _parameter_value_map = (('min', 'min_value'), ('max', 'max_value'))
-    _parameter_spec_value_key = 'integer_value_spec'
+
+    _proto_parameter_value_class = (
+        gca_study_compat.StudySpec.ParameterSpec.IntegerValueSpec
+    )
+    _parameter_value_map = (("min", "min_value"), ("max", "max_value"))
+    _parameter_spec_value_key = "integer_value_spec"
 
     def __init__(
         self,
         min: int,
         max: int,
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None
-        ):
+        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None,
+    ):
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values)
+            parent_value=parent_values,
+        )
 
         self.min = min
-        self.max = max,
-        self.scale=scale
+        self.max = (max,)
+        self.scale = scale
+
 
 class CategoricalValueSpec(_ParameterSpec):
 
-    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.CategoricalValueSpec
-    _parameter_value_map = (('values', 'values'))
-    _parameter_spec_value_key = 'categorical_value_spec'
-    
+    _proto_parameter_value_class = (
+        gca_study_compat.StudySpec.ParameterSpec.CategoricalValueSpec
+    )
+    _parameter_value_map = ("values", "values")
+    _parameter_spec_value_key = "categorical_value_spec"
+
     def __init__(
         self,
         values: List[str],
-        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None
-        ):
+        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None,
+    ):
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values)
+            parent_value=parent_values,
+        )
 
         self.values = values
 
 
 class DiscreteValueSpec(_ParameterSpec):
 
-    _proto_parameter_value_class = gca_study_compat.StudySpec.ParameterSpec.DiscreteValueSpec
-    _parameter_value_map = (('values', 'values'))
-    _parameter_spec_value_key = 'discrete_value_spec'
-    
+    _proto_parameter_value_class = (
+        gca_study_compat.StudySpec.ParameterSpec.DiscreteValueSpec
+    )
+    _parameter_value_map = ("values", "values")
+    _parameter_spec_value_key = "discrete_value_spec"
+
     def __init__(
         self,
         values: List[float],
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, '_Parameter']] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None
-        ):
+        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        parent_values: Optional[List[Union[float, int, str]]] = None,
+    ):
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values)
+            parent_value=parent_values,
+        )
 
         self.values = values
         self.scale = scale
-
-
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index aa06fc6370..25f065f502 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -36,8 +36,8 @@
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import hyperparameter_tuning
 from google.cloud.aiplatform import utils
-from  google.cloud.aiplatform.utils import source_utils
-from  google.cloud.aiplatform.utils import worker_spec_utils
+from google.cloud.aiplatform.utils import source_utils
+from google.cloud.aiplatform.utils import worker_spec_utils
 
 from google.cloud.aiplatform.compat.services import job_service_client
 from google.cloud.aiplatform.compat.types import (
@@ -52,7 +52,7 @@
     hyperparameter_tuning_job as gca_hyperparameter_tuning_job_compat,
     machine_resources as gca_machine_resources_compat,
     machine_resources_v1beta1 as gca_machine_resources_v1beta1,
-    study as gca_study_compat
+    study as gca_study_compat,
 )
 
 logging.basicConfig(level=logging.INFO, stream=sys.stdout)
@@ -776,7 +776,76 @@ def iter_outputs(
             )
 
 
-class CustomJob(_Job):
+class _RunnableJob(_Job):
+    def __init__(
+        self,
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+    ):
+
+        base.AiPlatformResourceNounWithFutureManager.__init__(
+            self, project=project, location=location, credentials=credentials
+        )
+
+        self._parent = aiplatform.initializer.global_config.common_location_path(
+            project=project, location=location
+        )
+
+    @abc.abstractmethod
+    def run(self) -> None:
+        pass
+
+    @property
+    def _has_run(self) -> bool:
+        return bool(getattr(self._gca_resource, "name"))
+
+    @property
+    def state(self) -> gca_job_state.JobState:
+        if not self._has_run:
+            raise RuntimeError("Job has not run. No state available.")
+
+        return super().state
+
+    @classmethod
+    def get(
+        cls,
+        resource_name: str,
+        project: Optional[str] = None,
+        location: Optional[str] = None,
+        credentials: Optional[auth_credentials.Credentials] = None,
+    ) -> "_RunnableJob":
+        """Get an AI Platform Job for the given resource_name.
+
+        Args:
+            resource_name (str):
+                Required. A fully-qualified resource name or ID.
+            project (str):
+                Optional project to retrieve dataset from. If not set, project
+                set in aiplatform.init will be used.
+            location (str):
+                Optional location to retrieve dataset from. If not set, location
+                set in aiplatform.init will be used.
+            credentials (auth_credentials.Credentials):
+                Custom credentials to use to upload this model. Overrides
+                credentials set in aiplatform.init.
+
+        Returns:
+            An AI Platform Job.
+        """
+        self = cls._empty_constructor(
+            project=project,
+            location=location,
+            credentials=credentials,
+            resource_name=resource_name,
+        )
+
+        self._gca_resource = self._get_gca_resource(resource_name=resource_name)
+
+        return self
+
+
+class CustomJob(_RunnableJob):
     _resource_noun = "customJobs"
     _getter_method = "get_custom_job"
     _list_method = "list_custom_job"
@@ -785,25 +854,18 @@ class CustomJob(_Job):
     _job_type = "training"
     pass
 
-    def __init__(self,
+    def __init__(
+        self,
         display_name: str,
         worker_pool_specs: Union[Dict],
         project: Optional[str] = None,
         location: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
-        staging_bucket: Optional[str] = None):
-
-        base.AiPlatformResourceNounWithFutureManager.__init__(self,
-                project=project,
-                location=location,
-                credentials=credentials   
-        )
+        staging_bucket: Optional[str] = None,
+    ):
 
-        self._parent = aiplatform.initializer.global_config.common_location_path(
-                project=project,
-                location=location
-            )
+        super().__init__(project=project, location=location, credentials=credentials)
 
         staging_bucket = staging_bucket or initializer.global_config.staging_bucket
 
@@ -813,18 +875,19 @@ def __init__(self,
                 "should be set using aiplatform.init(staging_bucket='gs://my-bucket')"
             )
 
-        self._gca_resource  = gca_custom_job_compat.CustomJob(
+        self._gca_resource = gca_custom_job_compat.CustomJob(
             display_name=display_name,
-            job_spec = gca_custom_job_compat.CustomJobSpec(
+            job_spec=gca_custom_job_compat.CustomJobSpec(
                 worker_pool_specs=worker_pool_specs,
-                base_output_directory=gca_io_compat.GcsDestination(output_uri_prefix=staging_bucket),
+                base_output_directory=gca_io_compat.GcsDestination(
+                    output_uri_prefix=staging_bucket
                 ),
-            encryption_spec= initializer.global_config.get_encryption_spec(
+            ),
+            encryption_spec=initializer.global_config.get_encryption_spec(
                 encryption_spec_key_name=encryption_spec_key_name
-            )
+            ),
         )
 
-
     @classmethod
     def from_local_script(
         cls,
@@ -840,10 +903,10 @@ def from_local_script(
         accelerator_count: int = 0,
         project: Optional[str] = None,
         location: Optional[str] = None,
-        staging_bucket: Optional[str]= None,
+        staging_bucket: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
-    ) -> 'CustomJob':
+    ) -> "CustomJob":
 
         project = project or initializer.global_config.project
         location = location or initializer.global_config.location
@@ -862,15 +925,12 @@ def from_local_script(
             accelerator_type=accelerator_type,
         ).pool_specs
 
-
         python_packager = source_utils._TrainingScriptPythonPackager(
-                script_path=script_path, requirements=requirements
-            )
+            script_path=script_path, requirements=requirements
+        )
 
         package_gcs_uri = python_packager.package_and_copy_to_gcs(
-            gcs_staging_dir = staging_bucket,
-            project = project,
-            credentials = credentials,
+            gcs_staging_dir=staging_bucket, project=project, credentials=credentials,
         )
 
         for spec in worker_pool_specs:
@@ -896,17 +956,18 @@ def from_local_script(
             location=location,
             credentials=credentials,
             encryption_spec_key_name=encryption_spec_key_name,
-            staging_bucket=staging_bucket)
-
+            staging_bucket=staging_bucket,
+        )
 
     @base.optional_sync()
     def run(
         self,
         service_account: Optional[str] = None,
         network: Optional[str] = None,
-        timeout: Optional[int] = None, # seconds
-        restart_job_on_worker_restart: bool=False,
-        sync: bool = True) -> None:
+        timeout: Optional[int] = None,  # seconds
+        restart_job_on_worker_restart: bool = False,
+        sync: bool = True,
+    ) -> None:
 
         if service_account:
             self._gca_resource.service_account = service_account
@@ -914,34 +975,28 @@ def run(
         if network:
             self._gca_resource.network = network
 
-
         if timeout or restart_job_on_worker_restart:
             timout = duration_pb2.Duration(seconds=timout) if timeout else None
             self._gca_resource.job_spec.scheduling = gca_custom_job_compat.Scheduling(
-                    timeout=timeout, 
-                    restart_job_on_worker_restart=restart_job_on_worker_restart
-                )
+                timeout=timeout,
+                restart_job_on_worker_restart=restart_job_on_worker_restart,
+            )
 
         _LOGGER.log_create_with_lro(self.__class__)
 
         self._gca_resource = self.api_client.create_custom_job(
-                parent=self._parent, custom_job=self._gca_resource
-            )
+            parent=self._parent, custom_job=self._gca_resource
+        )
 
         _LOGGER.log_create_complete(self.__class__, self._gca_resource, "custom_job")
 
-        _LOGGER.info(
-            "View Custom Job:\n%s" % self._dashboard_uri()
-        )
+        _LOGGER.info("View Custom Job:\n%s" % self._dashboard_uri())
 
         self._block_until_complete()
 
-
     @property
     def job_spec(self):
         return self._gca_resource.job_spec
-    
-
 
 
 class DataLabelingJob(_Job):
@@ -954,27 +1009,27 @@ class DataLabelingJob(_Job):
     pass
 
 
-_search_algorithm_to_proto_value = {
-    'random': gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
-    'grid': gca_study_compat.StudySpec.Algorithm.GRID_SEARCH
-
+_SEARCH_ALGORITHM_TO_PROTO_VALUE = {
+    "random": gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
+    "grid": gca_study_compat.StudySpec.Algorithm.GRID_SEARCH,
 }
 
-_measurement_selection_to_proto_value = {
-    'best': gca_study_compat.StudySpec.MeasurementSelectionType.BEST_MEASUREMENT,
-    'last': gca_study_compat.StudySpec.MeasurementSelectionType.LAST_MEASUREMENT
+_MEASUREMENT_SELECTION_TO_PROTO_VALUE = {
+    "best": gca_study_compat.StudySpec.MeasurementSelectionType.BEST_MEASUREMENT,
+    "last": gca_study_compat.StudySpec.MeasurementSelectionType.LAST_MEASUREMENT,
 }
 
-class HyperparameterTuningJob(_Job):
+
+class HyperparameterTuningJob(_RunnableJob):
     _resource_noun = "hyperparameterTuningJobs"
     _getter_method = "get_hyperparameter_tuning_job"
     _list_method = "list_hyperparameter_tuning_jobs"
     _cancel_method = "cancel_hyperparameter_tuning_job"
     _delete_method = "delete_hyperparameter_tuning_job"
     _job_type = "training"
-    
 
-    def __init__(self,
+    def __init__(
+        self,
         display_name: str,
         custom_job: CustomJob,
         metric_spec: Dict[str, str],
@@ -982,28 +1037,19 @@ def __init__(self,
         max_trial_count: int,
         parallel_trial_count: int,
         max_failed_trial_count: int = 0,
-        search_algorithm: Optional[str] = 'random',
-        # observation_noise: Optional[str] = 'low',
-        measurement_selection: Optional[str] = 'best',
+        search_algorithm: Optional[str] = "random",
+        measurement_selection: Optional[str] = "best",
         project: Optional[str] = None,
         location: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
-        # staging_bucket: Optional[str] = None
     ):
-        base.AiPlatformResourceNounWithFutureManager.__init__(self,
-                project=project,
-                location=location,
-                credentials=credentials, 
-        )
-
-        self._parent = aiplatform.initializer.global_config.common_location_path(
-                project=project,
-                location=location
-            )
+        super().__init__(project=project, location=location, credentials=credentials)
 
         metrics = [
-            gca_study_compat.StudySpec.MetricSpec(metric_id=metric_id, goal=goal.upper())
+            gca_study_compat.StudySpec.MetricSpec(
+                metric_id=metric_id, goal=goal.upper()
+            )
             for metric_id, goal in metric_spec.items()
         ]
 
@@ -1013,23 +1059,24 @@ def __init__(self,
         ]
 
         study_spec = gca_study_compat.StudySpec(
-            metrics = metrics,
-            parameters = parameters,
-            algorithm = _search_algorithm_to_proto_value[search_algorithm],
-            # observation_noise = observation_noise.upper(),
-            measurement_selection_type = _measurement_selection_to_proto_value[measurement_selection]
+            metrics=metrics,
+            parameters=parameters,
+            algorithm=_SEARCH_ALGORITHM_TO_PROTO_VALUE[search_algorithm],
+            measurement_selection_type=_MEASUREMENT_SELECTION_TO_PROTO_VALUE[
+                measurement_selection
+            ],
         )
 
         self._gca_resource = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
-                display_name=display_name,
-                study_spec=study_spec,
-                max_trial_count=max_trial_count,
-                parallel_trial_count=parallel_trial_count,
-                max_failed_trial_count=max_failed_trial_count,
-                trial_job_spec=copy.deepcopy(custom_job.job_spec),
-                encryption_spec= initializer.global_config.get_encryption_spec(
-                    encryption_spec_key_name = encryption_spec_key_name
-                )
+            display_name=display_name,
+            study_spec=study_spec,
+            max_trial_count=max_trial_count,
+            parallel_trial_count=parallel_trial_count,
+            max_failed_trial_count=max_failed_trial_count,
+            trial_job_spec=copy.deepcopy(custom_job.job_spec),
+            encryption_spec=initializer.global_config.get_encryption_spec(
+                encryption_spec_key_name=encryption_spec_key_name
+            ),
         )
 
     @base.optional_sync()
@@ -1037,9 +1084,10 @@ def run(
         self,
         service_account: Optional[str] = None,
         network: Optional[str] = None,
-        timeout: Optional[int] = None, # seconds
-        restart_job_on_worker_restart: bool=False,
-        sync: bool = True) -> None:
+        timeout: Optional[int] = None,  # seconds
+        restart_job_on_worker_restart: bool = False,
+        sync: bool = True,
+    ) -> None:
 
         if service_account:
             self._gca_resource.trial_job_spec.service_account = service_account
@@ -1047,29 +1095,25 @@ def run(
         if network:
             self._gca_resource.trial_job_spec.network = network
 
-
         if timeout or restart_job_on_worker_restart:
             timout = duration_pb2.Duration(seconds=timout) if timeout else None
             self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
-                    timeout=timeout, 
-                    restart_job_on_worker_restart=restart_job_on_worker_restart
-                )
+                timeout=timeout,
+                restart_job_on_worker_restart=restart_job_on_worker_restart,
+            )
 
         _LOGGER.log_create_with_lro(self.__class__)
 
         self._gca_resource = self.api_client.create_hyperparameter_tuning_job(
-                parent=self._parent, 
-                hyperparameter_tuning_job=self._gca_resource
-            )
+            parent=self._parent, hyperparameter_tuning_job=self._gca_resource
+        )
 
         _LOGGER.log_create_complete(self.__class__, self._gca_resource, "hpt_job")
 
-        _LOGGER.info(
-            "View HyperparameterTuningJob:\n%s" % self._dashboard_uri()
-        )
-
+        _LOGGER.info("View HyperparameterTuningJob:\n%s" % self._dashboard_uri())
 
         self._block_until_complete()
 
-
-
+    @property
+    def trials(self) -> List[gca_study_compat.Trial]:
+        return list(self._gca_resource.trials)
diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
index f8f56bd5da..0121c585f5 100644
--- a/google/cloud/aiplatform/training_jobs.py
+++ b/google/cloud/aiplatform/training_jobs.py
@@ -38,8 +38,8 @@
     training_pipeline as gca_training_pipeline,
 )
 from google.cloud.aiplatform.utils import _timestamped_gcs_dir
-from google.cloud.aiplatform.utils.source_utils import _TrainingScriptPythonPackager
-from google.cloud.aiplatform.utils.worker_spec_utils import _DistributedTrainingSpec
+from google.cloud.aiplatform.utils import source_utils
+from google.cloud.aiplatform.utils import worker_spec_utils
 
 from google.cloud.aiplatform.v1.schema.trainingjob import (
     definition_v1 as training_job_inputs,
@@ -1005,7 +1005,7 @@ def _prepare_and_validate_run(
         machine_type: str = "n1-standard-4",
         accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
         accelerator_count: int = 0,
-    ) -> Tuple[_DistributedTrainingSpec, Optional[gca_model.Model]]:
+    ) -> Tuple[worker_spec_utils._DistributedTrainingSpec, Optional[gca_model.Model]]:
         """Create worker pool specs and managed model as well validating the
         run.
 
@@ -1055,7 +1055,7 @@ def _prepare_and_validate_run(
             model_display_name = model_display_name or self._display_name + "-model"
 
         # validates args and will raise
-        worker_pool_specs = _DistributedTrainingSpec.chief_worker_pool(
+        worker_pool_specs = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool(
             replica_count=replica_count,
             machine_type=machine_type,
             accelerator_count=accelerator_count,
@@ -1073,7 +1073,7 @@ def _prepare_and_validate_run(
 
     def _prepare_training_task_inputs_and_output_dir(
         self,
-        worker_pool_specs: _DistributedTrainingSpec,
+        worker_pool_specs: worker_spec_utils._DistributedTrainingSpec,
         base_output_dir: Optional[str] = None,
         service_account: Optional[str] = None,
         network: Optional[str] = None,
@@ -1081,7 +1081,7 @@ def _prepare_training_task_inputs_and_output_dir(
         """Prepares training task inputs and output directory for custom job.
 
         Args:
-            worker_pools_spec (_DistributedTrainingSpec):
+            worker_pools_spec (worker_spec_utils._DistributedTrainingSpec):
                 Worker pools pecs required to run job.
             base_output_dir (str):
                 GCS output directory of job. If not provided a
@@ -1106,12 +1106,12 @@ def _prepare_training_task_inputs_and_output_dir(
         _LOGGER.info("Training Output directory:\n%s " % base_output_dir)
 
         training_task_inputs = {
-            "workerPoolSpecs": worker_pool_specs,
-            "baseOutputDirectory": {"output_uri_prefix": base_output_dir},
+            "worker_pool_specs": worker_pool_specs,
+            "base_output_directory": {"output_uri_prefix": base_output_dir},
         }
 
         if service_account:
-            training_task_inputs["serviceAccount"] = service_account
+            training_task_inputs["service_account"] = service_account
         if network:
             training_task_inputs["network"] = network
 
@@ -1531,7 +1531,7 @@ def run(
         )
 
         # make and copy package
-        python_packager = _TrainingScriptPythonPackager(
+        python_packager = source_utils._TrainingScriptPythonPackager(
             script_path=self._script_path, requirements=self._requirements
         )
 
@@ -1557,7 +1557,7 @@ def run(
     @base.optional_sync(construct_object_on_arg="managed_model")
     def _run(
         self,
-        python_packager: _TrainingScriptPythonPackager,
+        python_packager: source_utils._TrainingScriptPythonPackager,
         dataset: Optional[
             Union[
                 datasets.ImageDataset,
@@ -1567,7 +1567,7 @@ def _run(
             ]
         ],
         annotation_schema_uri: Optional[str],
-        worker_pool_specs: _DistributedTrainingSpec,
+        worker_pool_specs: worker_spec_utils._DistributedTrainingSpec,
         managed_model: Optional[gca_model.Model] = None,
         args: Optional[List[Union[str, float, int]]] = None,
         environment_variables: Optional[Dict[str, str]] = None,
@@ -1584,7 +1584,7 @@ def _run(
         """Packages local script and launches training_job.
 
         Args:
-            python_packager (_TrainingScriptPythonPackager):
+            python_packager (source_utils._TrainingScriptPythonPackager):
                 Required. Python Packager pointing to training script locally.
             dataset (
                 Union[
@@ -1598,7 +1598,7 @@ def _run(
             annotation_schema_uri (str):
                 Google Cloud Storage URI points to a YAML file describing
                 annotation schema.
-            worker_pools_spec (_DistributedTrainingSpec):
+            worker_pools_spec (worker_spec_utils._DistributedTrainingSpec):
                 Worker pools pecs required to run job.
             managed_model (gca_model.Model):
                 Model proto if this script produces a Managed Model.
@@ -1682,17 +1682,17 @@ def _run(
         )
 
         for spec in worker_pool_specs:
-            spec["pythonPackageSpec"] = {
-                "executorImageUri": self._container_uri,
-                "pythonModule": python_packager.module_name,
-                "packageUris": [package_gcs_uri],
+            spec["python_package_spec"] = {
+                "executor_image_uri": self._container_uri,
+                "python_module": python_packager.module_name,
+                "package_uris": [package_gcs_uri],
             }
 
             if args:
-                spec["pythonPackageSpec"]["args"] = args
+                spec["python_package_spec"]["args"] = args
 
             if environment_variables:
-                spec["pythonPackageSpec"]["env"] = [
+                spec["python_package_spec"]["env"] = [
                     {"name": key, "value": value}
                     for key, value in environment_variables.items()
                 ]
@@ -2146,7 +2146,7 @@ def _run(
             ]
         ],
         annotation_schema_uri: Optional[str],
-        worker_pool_specs: _DistributedTrainingSpec,
+        worker_pool_specs: worker_spec_utils._DistributedTrainingSpec,
         managed_model: Optional[gca_model.Model] = None,
         args: Optional[List[Union[str, float, int]]] = None,
         environment_variables: Optional[Dict[str, str]] = None,
@@ -2174,7 +2174,7 @@ def _run(
             annotation_schema_uri (str):
                 Google Cloud Storage URI points to a YAML file describing
                 annotation schema.
-            worker_pools_spec (_DistributedTrainingSpec):
+            worker_pools_spec (worker_spec_utils._DistributedTrainingSpec):
                 Worker pools pecs required to run job.
             managed_model (gca_model.Model):
                 Model proto if this script produces a Managed Model.
@@ -3943,7 +3943,7 @@ def _run(
             ]
         ],
         annotation_schema_uri: Optional[str],
-        worker_pool_specs: _DistributedTrainingSpec,
+        worker_pool_specs: worker_spec_utils._DistributedTrainingSpec,
         managed_model: Optional[gca_model.Model] = None,
         args: Optional[List[Union[str, float, int]]] = None,
         environment_variables: Optional[Dict[str, str]] = None,
@@ -3972,7 +3972,7 @@ def _run(
             annotation_schema_uri (str):
                 Google Cloud Storage URI points to a YAML file describing
                 annotation schema.
-            worker_pools_spec (_DistributedTrainingSpec):
+            worker_pools_spec (worker_spec_utils._DistributedTrainingSpec):
                 Worker pools pecs required to run job.
             managed_model (gca_model.Model):
                 Model proto if this script produces a Managed Model.
@@ -4035,17 +4035,17 @@ def _run(
                 produce an AI Platform Model.
         """
         for spec in worker_pool_specs:
-            spec["pythonPackageSpec"] = {
-                "executorImageUri": self._container_uri,
-                "pythonModule": self._python_module,
-                "packageUris": [self._package_gcs_uri],
+            spec["python_package_spec"] = {
+                "executor_image_uri": self._container_uri,
+                "python_module": self._python_module,
+                "package_uris": [self._package_gcs_uri],
             }
 
             if args:
-                spec["pythonPackageSpec"]["args"] = args
+                spec["python_package_spec"]["args"] = args
 
             if environment_variables:
-                spec["pythonPackageSpec"]["env"] = [
+                spec["python_package_spec"]["env"] = [
                     {"name": key, "value": value}
                     for key, value in environment_variables.items()
                 ]
diff --git a/google/cloud/aiplatform/training_utils.py b/google/cloud/aiplatform/training_utils.py
index 48af524e3f..95e4d2429a 100644
--- a/google/cloud/aiplatform/training_utils.py
+++ b/google/cloud/aiplatform/training_utils.py
@@ -106,8 +106,8 @@ def tf_config(self) -> Optional[Dict]:
             return None
 
 
-_DEFAULT_HYPERPARAMETER_METRIC_TAG = 'training/hptuning/metric'
-_DEFAULT_METRIC_PATH = '/tmp/hypertune/output.metrics'
+_DEFAULT_HYPERPARAMETER_METRIC_TAG = "training/hptuning/metric"
+_DEFAULT_METRIC_PATH = "/tmp/hypertune/output.metrics"
 # TODO(0olwzo0): consider to make it configurable
 _MAX_NUM_METRIC_ENTRIES_TO_PRESERVE = 100
 
@@ -122,29 +122,33 @@ def initialize(cls):
         if cls.initialized:
             return
 
-        cls.metric_path = os.environ.get('CLOUD_ML_HP_METRIC_FILE',
-                                          _DEFAULT_METRIC_PATH)
+        cls.metric_path = os.environ.get(
+            "CLOUD_ML_HP_METRIC_FILE", _DEFAULT_METRIC_PATH
+        )
         if not os.path.exists(os.path.dirname(cls.metric_path)):
             os.makedirs(os.path.dirname(cls.metric_path))
 
-        cls.trial_id = os.environ.get('CLOUD_ML_TRIAL_ID', 0)
+        cls.trial_id = os.environ.get("CLOUD_ML_TRIAL_ID", 0)
         cls.metrics_queue = collections.deque(
-            maxlen=_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE)
+            maxlen=_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE
+        )
 
         cls.initialized = True
 
     @classmethod
     def _dump_metrics_to_file(cls):
-        with open(cls.metric_path, 'w') as metric_file:
+        with open(cls.metric_path, "w") as metric_file:
             for metric in cls.metrics_queue:
-                metric_file.write(json.dumps(metric, sort_keys=True) + '\n')
+                metric_file.write(json.dumps(metric, sort_keys=True) + "\n")
 
     @classmethod
-    def report_hyperparameter_tuning_metric(cls,
-                                            hyperparameter_metric_tag,
-                                            metric_value,
-                                            global_step=None,
-                                            checkpoint_path=''):
+    def report_hyperparameter_tuning_metric(
+        cls,
+        hyperparameter_metric_tag,
+        metric_value,
+        global_step=None,
+        checkpoint_path="",
+    ):
         """Method to report hyperparameter tuning metric.
         Args:
           hyperparameter_metric_tag: The hyperparameter metric name this metric
@@ -159,33 +163,25 @@ def report_hyperparameter_tuning_metric(cls,
         if hyperparameter_metric_tag:
             metric_tag = hyperparameter_metric_tag
         metric_body = {
-            'timestamp': time.time(),
-            'trial': str(cls.trial_id),
+            "timestamp": time.time(),
+            "trial": str(cls.trial_id),
             metric_tag: str(metric_value),
-            'global_step': str(int(global_step) if global_step else 0),
-            'checkpoint_path': checkpoint_path
+            "global_step": str(int(global_step) if global_step else 0),
+            "checkpoint_path": checkpoint_path,
         }
         cls.metrics_queue.append(metric_body)
         cls._dump_metrics_to_file()
 
 
 def report_hyperparameter_tuning_metrics(
-        metrics: Dict[str, float],
-        global_step: Optional[int] = None,
-        checkpoint_path=''
-    ):
-        _HyperparameterTuningJobReporterSingleton.initialize()
-        
-        for hyperparameter_metric_tag, metric_value in metrics.items():
-            _HyperparameterTuningJobReporterSingleton.report_hyperparameter_tuning_metric(
-                    hyperparameter_metric_tag=hyperparameter_metric_tag,
-                    metric_value=metric_value,
-                    global_step=global_step,
-                    checkpoint_path=checkpoint_path
-                )
-
-    
-
-
-
-
+    metrics: Dict[str, float], global_step: Optional[int] = None, checkpoint_path=""
+):
+    _HyperparameterTuningJobReporterSingleton.initialize()
+
+    for hyperparameter_metric_tag, metric_value in metrics.items():
+        _HyperparameterTuningJobReporterSingleton.report_hyperparameter_tuning_metric(
+            hyperparameter_metric_tag=hyperparameter_metric_tag,
+            metric_value=metric_value,
+            global_step=global_step,
+            checkpoint_path=checkpoint_path,
+        )
diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py
index c847a56244..22a4d985bb 100644
--- a/google/cloud/aiplatform/utils/__init__.py
+++ b/google/cloud/aiplatform/utils/__init__.py
@@ -565,4 +565,4 @@ def _timestamped_copy_to_gcs(
     blob.upload_from_filename(local_file_path)
 
     gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])])
-    return gcs_path
\ No newline at end of file
+    return gcs_path
diff --git a/google/cloud/aiplatform/utils/source_utils.py b/google/cloud/aiplatform/utils/source_utils.py
index 8539e3122d..f84e37b52a 100644
--- a/google/cloud/aiplatform/utils/source_utils.py
+++ b/google/cloud/aiplatform/utils/source_utils.py
@@ -213,4 +213,4 @@ def package_and_copy_to_gcs(
             project=project,
             credentials=credentials,
         )
-        return self.package_and_copy(copy_method=copy_method)
\ No newline at end of file
+        return self.package_and_copy(copy_method=copy_method)
diff --git a/google/cloud/aiplatform/utils/worker_spec_utils.py b/google/cloud/aiplatform/utils/worker_spec_utils.py
index a23b997f48..ba81352d73 100644
--- a/google/cloud/aiplatform/utils/worker_spec_utils.py
+++ b/google/cloud/aiplatform/utils/worker_spec_utils.py
@@ -1,7 +1,9 @@
 from typing import NamedTuple, Optional, Dict, Union, List
 
 from google.cloud.aiplatform import utils
-from google.cloud.aiplatform.compat.types import accelerator_type as gca_accelerator_type_compat
+from google.cloud.aiplatform.compat.types import (
+    accelerator_type as gca_accelerator_type_compat,
+)
 
 
 class _MachineSpec(NamedTuple):
@@ -178,4 +180,4 @@ def chief_worker_pool(
             accelerator_type=accelerator_type,
         )
 
-        return cls(chief_spec=chief_spec, worker_spec=worker_spec)
\ No newline at end of file
+        return cls(chief_spec=chief_spec, worker_spec=worker_spec)
diff --git a/tests/unit/aiplatform/test_end_to_end.py b/tests/unit/aiplatform/test_end_to_end.py
index f4b1355679..e4a50b4014 100644
--- a/tests/unit/aiplatform/test_end_to_end.py
+++ b/tests/unit/aiplatform/test_end_to_end.py
@@ -19,7 +19,7 @@
 
 from importlib import reload
 
-import google.cloud.aiplatform.utils.source_utils
+from google.cloud.aiplatform.utils import source_utils
 from google.cloud import aiplatform
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import models
@@ -205,16 +205,16 @@ def test_dataset_create_to_model_predict(
         true_args = test_training_jobs._TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": test_training_jobs._TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": test_training_jobs._TEST_MACHINE_TYPE,
-                "acceleratorType": test_training_jobs._TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": test_training_jobs._TEST_ACCELERATOR_COUNT,
+            "replica_count": test_training_jobs._TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": test_training_jobs._TEST_MACHINE_TYPE,
+                "accelerator_type": test_training_jobs._TEST_ACCELERATOR_TYPE,
+                "accelerator_count": test_training_jobs._TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -249,8 +249,8 @@ def test_dataset_create_to_model_predict(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
                         "output_uri_prefix": test_training_jobs._TEST_BASE_OUTPUT_DIR
                     },
                 },
@@ -386,16 +386,16 @@ def test_dataset_create_to_model_predict_with_pipeline_fail(
         true_args = test_training_jobs._TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": test_training_jobs._TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": test_training_jobs._TEST_MACHINE_TYPE,
-                "acceleratorType": test_training_jobs._TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": test_training_jobs._TEST_ACCELERATOR_COUNT,
+            "replica_count": test_training_jobs._TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": test_training_jobs._TEST_MACHINE_TYPE,
+                "accelerator_type": test_training_jobs._TEST_ACCELERATOR_TYPE,
+                "accelerator_count": test_training_jobs._TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": google.cloud.aiplatform.utils.source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [test_training_jobs._TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -431,8 +431,8 @@ def test_dataset_create_to_model_predict_with_pipeline_fail(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
                         "output_uri_prefix": test_training_jobs._TEST_BASE_OUTPUT_DIR
                     },
                 },
diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py
index b4745d29f1..75478263e8 100644
--- a/tests/unit/aiplatform/test_training_jobs.py
+++ b/tests/unit/aiplatform/test_training_jobs.py
@@ -515,7 +515,7 @@ def mock_model_service_get():
 @pytest.fixture
 def mock_python_package_to_gcs():
     with mock.patch.object(
-            source_utils._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
+        source_utils._TrainingScriptPythonPackager, "package_and_copy_to_gcs"
     ) as mock_package_to_copy_gcs:
         mock_package_to_copy_gcs.return_value = _TEST_OUTPUT_PYTHON_PACKAGE_PATH
         yield mock_package_to_copy_gcs
@@ -633,16 +633,16 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
         ]
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
             },
@@ -702,9 +702,11 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
-                    "serviceAccount": _TEST_SERVICE_ACCOUNT,
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
+                    "service_account": _TEST_SERVICE_ACCOUNT,
                     "network": _TEST_NETWORK,
                 },
                 struct_pb2.Value(),
@@ -792,16 +794,16 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
         ]
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
             },
@@ -861,8 +863,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -1067,16 +1071,16 @@ def test_run_call_pipeline_service_create_with_no_dataset(
         ]
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
             },
@@ -1097,8 +1101,10 @@ def test_run_call_pipeline_service_create_with_no_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -1319,31 +1325,31 @@ def test_run_call_pipeline_service_create_distributed_training(
 
         true_worker_pool_spec = [
             {
-                "replicaCount": 1,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 1,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "pythonPackageSpec": {
-                    "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                    "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+                "python_package_spec": {
+                    "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                    "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                    "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
                 },
             },
             {
-                "replicaCount": 9,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 9,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "pythonPackageSpec": {
-                    "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                    "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+                "python_package_spec": {
+                    "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                    "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                    "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                     "env": true_env,
                 },
@@ -1385,8 +1391,10 @@ def test_run_call_pipeline_service_create_distributed_training(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": true_worker_pool_spec,
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": true_worker_pool_spec,
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -1547,16 +1555,16 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": source_utils._TrainingScriptPythonPackager.module_name,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": source_utils._TrainingScriptPythonPackager.module_name,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -1612,8 +1620,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -1787,11 +1797,11 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
         ]
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
             "containerSpec": {
                 "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -1855,8 +1865,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -1935,11 +1947,11 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
             "containerSpec": {
                 "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -2002,8 +2014,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -2192,11 +2206,11 @@ def test_run_call_pipeline_service_create_with_no_dataset(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
             "containerSpec": {
                 "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -2220,8 +2234,10 @@ def test_run_call_pipeline_service_create_with_no_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -2421,11 +2437,11 @@ def test_run_call_pipeline_service_create_distributed_training(
 
         true_worker_pool_spec = [
             {
-                "replicaCount": 1,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 1,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
                 "containerSpec": {
                     "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -2434,11 +2450,11 @@ def test_run_call_pipeline_service_create_distributed_training(
                 },
             },
             {
-                "replicaCount": 9,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 9,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
                 "containerSpec": {
                     "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -2483,8 +2499,10 @@ def test_run_call_pipeline_service_create_distributed_training(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": true_worker_pool_spec,
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": true_worker_pool_spec,
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -2561,11 +2579,11 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
             "containerSpec": {
                 "imageUri": _TEST_TRAINING_CONTAINER_IMAGE,
@@ -2625,9 +2643,11 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
-                    "serviceAccount": _TEST_SERVICE_ACCOUNT,
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
+                    "service_account": _TEST_SERVICE_ACCOUNT,
                     "network": _TEST_NETWORK,
                 },
                 struct_pb2.Value(),
@@ -2700,12 +2720,12 @@ def test_machine_spec_return_spec_dict(self):
         )
 
         true_spec_dict = {
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "replicaCount": _TEST_REPLICA_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
         }
 
         assert test_spec.spec_dict == true_spec_dict
@@ -2719,8 +2739,8 @@ def test_machine_spec_return_spec_dict_with_no_accelerator(self):
         )
 
         true_spec_dict = {
-            "machineSpec": {"machineType": _TEST_MACHINE_TYPE},
-            "replicaCount": _TEST_REPLICA_COUNT,
+            "machine_spec": {"machine_type": _TEST_MACHINE_TYPE},
+            "replica_count": _TEST_REPLICA_COUNT,
         }
 
         assert test_spec.spec_dict == true_spec_dict
@@ -2789,36 +2809,36 @@ def test_machine_spec_returns_pool_spec(self):
 
         true_pool_spec = [
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 1,
+                "replica_count": 1,
             },
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 10,
+                "replica_count": 10,
             },
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 3,
+                "replica_count": 3,
             },
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 1,
+                "replica_count": 1,
             },
         ]
 
@@ -2835,20 +2855,20 @@ def test_chief_worker_pool_returns_spec(self):
 
         true_pool_spec = [
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 1,
+                "replica_count": 1,
             },
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 9,
+                "replica_count": 9,
             },
         ]
 
@@ -2865,12 +2885,12 @@ def test_chief_worker_pool_returns_just_chief(self):
 
         true_pool_spec = [
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 1,
+                "replica_count": 1,
             }
         ]
 
@@ -2911,21 +2931,21 @@ def test_machine_spec_handles_missing_pools(self):
 
         true_pool_spec = [
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 1,
+                "replica_count": 1,
             },
-            {"machineSpec": {"machineType": "n1-standard-4"}, "replicaCount": 0},
+            {"machine_spec": {"machine_type": "n1-standard-4"}, "replica_count": 0},
             {
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "replicaCount": 3,
+                "replica_count": 3,
             },
         ]
 
@@ -3002,16 +3022,16 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
         ]
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": _TEST_PYTHON_MODULE_NAME,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
                 "env": true_env,
             },
@@ -3071,9 +3091,11 @@ def test_run_call_pipeline_service_create_with_tabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
-                    "serviceAccount": _TEST_SERVICE_ACCOUNT,
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
+                    "service_account": _TEST_SERVICE_ACCOUNT,
                     "network": _TEST_NETWORK,
                 },
                 struct_pb2.Value(),
@@ -3155,16 +3177,16 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": _TEST_PYTHON_MODULE_NAME,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -3223,8 +3245,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -3304,16 +3328,16 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": _TEST_PYTHON_MODULE_NAME,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -3372,8 +3396,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -3566,16 +3592,16 @@ def test_run_call_pipeline_service_create_with_no_dataset(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": _TEST_PYTHON_MODULE_NAME,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -3595,8 +3621,10 @@ def test_run_call_pipeline_service_create_with_no_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -3802,30 +3830,30 @@ def test_run_call_pipeline_service_create_distributed_training(
 
         true_worker_pool_spec = [
             {
-                "replicaCount": 1,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 1,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "pythonPackageSpec": {
-                    "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                    "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+                "python_package_spec": {
+                    "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                    "python_module": _TEST_PYTHON_MODULE_NAME,
+                    "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                 },
             },
             {
-                "replicaCount": 9,
-                "machineSpec": {
-                    "machineType": _TEST_MACHINE_TYPE,
-                    "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                    "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+                "replica_count": 9,
+                "machine_spec": {
+                    "machine_type": _TEST_MACHINE_TYPE,
+                    "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                    "accelerator_count": _TEST_ACCELERATOR_COUNT,
                 },
-                "pythonPackageSpec": {
-                    "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                    "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                    "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+                "python_package_spec": {
+                    "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                    "python_module": _TEST_PYTHON_MODULE_NAME,
+                    "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                     "args": true_args,
                 },
             },
@@ -3866,8 +3894,10 @@ def test_run_call_pipeline_service_create_distributed_training(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": true_worker_pool_spec,
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": true_worker_pool_spec,
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),
@@ -3943,16 +3973,16 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
         true_args = _TEST_RUN_ARGS
 
         true_worker_pool_spec = {
-            "replicaCount": _TEST_REPLICA_COUNT,
-            "machineSpec": {
-                "machineType": _TEST_MACHINE_TYPE,
-                "acceleratorType": _TEST_ACCELERATOR_TYPE,
-                "acceleratorCount": _TEST_ACCELERATOR_COUNT,
+            "replica_count": _TEST_REPLICA_COUNT,
+            "machine_spec": {
+                "machine_type": _TEST_MACHINE_TYPE,
+                "accelerator_type": _TEST_ACCELERATOR_TYPE,
+                "accelerator_count": _TEST_ACCELERATOR_COUNT,
             },
-            "pythonPackageSpec": {
-                "executorImageUri": _TEST_TRAINING_CONTAINER_IMAGE,
-                "pythonModule": _TEST_PYTHON_MODULE_NAME,
-                "packageUris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
+            "python_package_spec": {
+                "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+                "python_module": _TEST_PYTHON_MODULE_NAME,
+                "package_uris": [_TEST_OUTPUT_PYTHON_PACKAGE_PATH],
                 "args": true_args,
             },
         }
@@ -4008,8 +4038,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset(
             training_task_definition=schema.training_job.definition.custom_task,
             training_task_inputs=json_format.ParseDict(
                 {
-                    "workerPoolSpecs": [true_worker_pool_spec],
-                    "baseOutputDirectory": {"output_uri_prefix": _TEST_BASE_OUTPUT_DIR},
+                    "worker_pool_specs": [true_worker_pool_spec],
+                    "base_output_directory": {
+                        "output_uri_prefix": _TEST_BASE_OUTPUT_DIR
+                    },
                 },
                 struct_pb2.Value(),
             ),

From c9259bf24bb58793ffa94655087503773e6c9fe7 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 16:56:42 -0400
Subject: [PATCH 11/29] chore: lint

---
 google/cloud/aiplatform/__init__.py           |   3 +-
 .../cloud/aiplatform/hyperparameter_tuning.py |  10 +-
 google/cloud/aiplatform/jobs.py               | 106 +++++++++++++++---
 google/cloud/aiplatform/training_jobs.py      |   1 -
 tests/unit/aiplatform/test_end_to_end.py      |   1 -
 5 files changed, 99 insertions(+), 22 deletions(-)

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index 32ec7538e0..d544b4bc8f 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -66,7 +66,8 @@
     "explain",
     "gapic",
     "init",
-    "hyperparameter_tuning" "log_params",
+    "hyperparameter_tuning",
+    "log_params",
     "log_metrics",
     "get_experiment_df",
     "get_pipeline_df",
diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 5a2ef35fda..de9ecac84e 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -15,7 +15,7 @@
 class _ParameterSpec(metaclass=abc.ABCMeta):
     def __init__(
         self,
-        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
 
@@ -80,7 +80,7 @@ def __init__(
         min: float,
         max: float,
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
 
@@ -107,7 +107,7 @@ def __init__(
         min: int,
         max: int,
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
 
@@ -132,7 +132,7 @@ class CategoricalValueSpec(_ParameterSpec):
     def __init__(
         self,
         values: List[str],
-        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
 
@@ -156,7 +156,7 @@ def __init__(
         self,
         values: List[float],
         scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_Parameter"]] = None,
+        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
 
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 25f065f502..b96f4b3d0f 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -777,12 +777,22 @@ def iter_outputs(
 
 
 class _RunnableJob(_Job):
+    """ABC to interface job as a runnable training class."""
+
     def __init__(
         self,
         project: Optional[str] = None,
         location: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
     ):
+        """Initializes job with project, location, and api_client.
+
+        Args:
+            project(str): Project of the resource noun.
+            location(str): The location of the resource noun.
+            credentials(google.auth.crendentials.Crendentials): Optional custom
+                credentials to use when accessing interacting with resource noun.
+        """
 
         base.AiPlatformResourceNounWithFutureManager.__init__(
             self, project=project, location=location, credentials=credentials
@@ -798,10 +808,16 @@ def run(self) -> None:
 
     @property
     def _has_run(self) -> bool:
+        """Property returns true if this class has a resource name."""
         return bool(getattr(self._gca_resource, "name"))
 
     @property
     def state(self) -> gca_job_state.JobState:
+        """Current state of job.
+
+        Raises:
+            RuntimeError if job run has not been called.
+        """
         if not self._has_run:
             raise RuntimeError("Job has not run. No state available.")
 
@@ -845,25 +861,93 @@ def get(
         return self
 
 
+class DataLabelingJob(_Job):
+    _resource_noun = "dataLabelingJobs"
+    _getter_method = "get_data_labeling_job"
+    _list_method = "list_data_labeling_jobs"
+    _cancel_method = "cancel_data_labeling_job"
+    _delete_method = "delete_data_labeling_job"
+    _job_type = "labeling-tasks"
+    pass
+
+
 class CustomJob(_RunnableJob):
+    """Creates an AI Platform (Unified) Custom Job."""
+    
     _resource_noun = "customJobs"
     _getter_method = "get_custom_job"
     _list_method = "list_custom_job"
     _cancel_method = "cancel_custom_job"
     _delete_method = "delete_custom_job"
     _job_type = "training"
-    pass
 
+    
     def __init__(
         self,
         display_name: str,
-        worker_pool_specs: Union[Dict],
+        worker_pool_specs: Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]],
         project: Optional[str] = None,
         location: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
         staging_bucket: Optional[str] = None,
     ):
+        """Cosntruct a Custom Job with Worker Pool Specs.
+
+        Example usage:
+        worker_pool_specs = [
+                {
+                    "machine_spec": {
+                        "machine_type": "n1-standard-4",
+                        "accelerator_type": "NVIDIA_TESLA_K80",
+                        "accelerator_count": 1,
+                    },
+                    "replica_count": 1,
+                    "container_spec": {
+                        "image_uri": container_image_uri,
+                        "command": [],
+                        "args": [],
+                    },
+                }
+            ]
+
+        my_job = aiplatform.CustomJob(
+            display_name='my_job',
+            worker_pool_specs=worker_pool_specs
+        )
+        
+        my_job.run()
+
+
+        For more information on configuring worker pool specs please visit: 
+        https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job
+
+
+        Args:
+            display_name (str): Required. The user-defined name of this Custom Job.
+            worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]): 
+                Required. The spec of the worker pools including machine type and Docker image.
+                Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages.
+            project (str):
+                Project to run the custom job in. Overrides project set in aiplatform.init.
+            location (str):
+                Location to run the custom job in. Overrides location set in aiplatform.init.
+            credentials (auth_credentials.Credentials):
+                Custom credentials to use to run call custom job service. Overrides
+                credentials set in aiplatform.init.
+            encryption_spec_key_name (str):
+                Customer-managed encryption key name for a
+                CustomJob. If this is set, then all resources
+                created by the CustomJob will be encrypted with
+                the provided encryption key.
+            staging_bucket (str):
+                Bucket for produced custom job artifacts. Overrides
+                staging_bucket set in aiplatform.init.
+
+        Raises:
+            RuntimeError is not staging bucket was set using aiplatfrom.init and a staging
+            bucket was not passed in.
+        """
 
         super().__init__(project=project, location=location, credentials=credentials)
 
@@ -907,6 +991,9 @@ def from_local_script(
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
     ) -> "CustomJob":
+        """Configures a custom job from a local script.
+
+        """
 
         project = project or initializer.global_config.project
         location = location or initializer.global_config.location
@@ -976,7 +1063,7 @@ def run(
             self._gca_resource.network = network
 
         if timeout or restart_job_on_worker_restart:
-            timout = duration_pb2.Duration(seconds=timout) if timeout else None
+            timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
             self._gca_resource.job_spec.scheduling = gca_custom_job_compat.Scheduling(
                 timeout=timeout,
                 restart_job_on_worker_restart=restart_job_on_worker_restart,
@@ -999,16 +1086,6 @@ def job_spec(self):
         return self._gca_resource.job_spec
 
 
-class DataLabelingJob(_Job):
-    _resource_noun = "dataLabelingJobs"
-    _getter_method = "get_data_labeling_job"
-    _list_method = "list_data_labeling_jobs"
-    _cancel_method = "cancel_data_labeling_job"
-    _delete_method = "delete_data_labeling_job"
-    _job_type = "labeling-tasks"
-    pass
-
-
 _SEARCH_ALGORITHM_TO_PROTO_VALUE = {
     "random": gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
     "grid": gca_study_compat.StudySpec.Algorithm.GRID_SEARCH,
@@ -1020,6 +1097,7 @@ class DataLabelingJob(_Job):
 }
 
 
+
 class HyperparameterTuningJob(_RunnableJob):
     _resource_noun = "hyperparameterTuningJobs"
     _getter_method = "get_hyperparameter_tuning_job"
@@ -1096,7 +1174,7 @@ def run(
             self._gca_resource.trial_job_spec.network = network
 
         if timeout or restart_job_on_worker_restart:
-            timout = duration_pb2.Duration(seconds=timout) if timeout else None
+            timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
             self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
                 timeout=timeout,
                 restart_job_on_worker_restart=restart_job_on_worker_restart,
diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
index 0121c585f5..470e30bf56 100644
--- a/google/cloud/aiplatform/training_jobs.py
+++ b/google/cloud/aiplatform/training_jobs.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-import sys
 import time
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
diff --git a/tests/unit/aiplatform/test_end_to_end.py b/tests/unit/aiplatform/test_end_to_end.py
index e4a50b4014..4aede65f08 100644
--- a/tests/unit/aiplatform/test_end_to_end.py
+++ b/tests/unit/aiplatform/test_end_to_end.py
@@ -24,7 +24,6 @@
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import models
 from google.cloud.aiplatform import schema
-from google.cloud.aiplatform import training_jobs
 
 from google.cloud.aiplatform_v1.types import (
     dataset as gca_dataset,

From df97a2d0250624b8b8104bfa5283e7a390dbed04 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Fri, 14 May 2021 19:30:09 -0400
Subject: [PATCH 12/29] checkpoint

---
 google/cloud/aiplatform/jobs.py | 85 ++++++++++++++++++++++++++++++---
 1 file changed, 78 insertions(+), 7 deletions(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index b96f4b3d0f..9e58f5d1c4 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -929,19 +929,19 @@ def __init__(
                 Required. The spec of the worker pools including machine type and Docker image.
                 Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages.
             project (str):
-                Project to run the custom job in. Overrides project set in aiplatform.init.
+                Optional.Project to run the custom job in. Overrides project set in aiplatform.init.
             location (str):
-                Location to run the custom job in. Overrides location set in aiplatform.init.
+                Optional.Location to run the custom job in. Overrides location set in aiplatform.init.
             credentials (auth_credentials.Credentials):
-                Custom credentials to use to run call custom job service. Overrides
+                Optional.Custom credentials to use to run call custom job service. Overrides
                 credentials set in aiplatform.init.
             encryption_spec_key_name (str):
-                Customer-managed encryption key name for a
+                Optional.Customer-managed encryption key name for a
                 CustomJob. If this is set, then all resources
                 created by the CustomJob will be encrypted with
                 the provided encryption key.
             staging_bucket (str):
-                Bucket for produced custom job artifacts. Overrides
+                Optional. Bucket for produced custom job artifacts. Overrides
                 staging_bucket set in aiplatform.init.
 
         Raises:
@@ -987,13 +987,62 @@ def from_local_script(
         accelerator_count: int = 0,
         project: Optional[str] = None,
         location: Optional[str] = None,
-        staging_bucket: Optional[str] = None,
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
+        staging_bucket: Optional[str] = None,
     ) -> "CustomJob":
         """Configures a custom job from a local script.
 
-        """
+        Args:
+            display_name (str):
+                Required. The user-defined name of this CustomJob.
+            script_path (str): Required. Local path to training script.
+            container_uri (str):
+                Required: Uri of the training container image to use for custom job.
+            requirements (Sequence[str]):
+                Optional. List of python packages dependencies of script.
+            environment_variables (Dict[str, str]):
+                Optional. Environment variables to be passed to the container.
+                Should be a dictionary where keys are environment variable names
+                and values are environment variable values for those names.
+                At most 10 environment variables can be specified.
+                The Name of the environment variable must be unique.
+
+                environment_variables = {
+                    'MY_KEY': 'MY_VALUE'
+                }
+            replica_count (int):
+                Optional. The number of worker replicas. If replica count = 1 then one chief
+                replica will be provisioned. If replica_count > 1 the remainder will be
+                provisioned as a worker replica pool.
+            machine_type (str):
+                Optional. The type of machine to use for training.
+            accelerator_type (str):
+                Optional. Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED,
+                NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4,
+                NVIDIA_TESLA_T4
+            accelerator_count (int):
+                Optional. The number of accelerators to attach to a worker replica.
+            project (str):
+                Optional. Project to run the custom job in. Overrides project set in aiplatform.init.
+            location (str):
+                Optional. Location to run the custom job in. Overrides location set in aiplatform.init.
+            credentials (auth_credentials.Credentials):
+                Optional. Custom credentials to use to run call custom job service. Overrides
+                credentials set in aiplatform.init.
+            encryption_spec_key_name (str):
+                Optional. Customer-managed encryption key name for a
+                CustomJob. If this is set, then all resources
+                created by the CustomJob will be encrypted with
+                the provided encryption key.
+            staging_bucket (str):
+                Optional. Bucket for produced custom job artifacts. Overrides
+                staging_bucket set in aiplatform.init.
+
+        Raises:
+            RuntimeError is not staging bucket was set using aiplatfrom.init and a staging
+            bucket was not passed in.
+        """ 
 
         project = project or initializer.global_config.project
         location = location or initializer.global_config.location
@@ -1055,6 +1104,28 @@ def run(
         restart_job_on_worker_restart: bool = False,
         sync: bool = True,
     ) -> None:
+        """Run this configured CustomJob.
+
+        Args:
+            service_account (str):
+                Optional. Specifies the service account for workload run-as account.
+                Users submitting jobs must have act-as permission on this run-as account.
+            network (str):
+                Optional. The full name of the Compute Engine network to which the job
+                should be peered. For example, projects/12345/global/networks/myVPC.
+                Private services access must already be configured for the network.
+                If left unspecified, the job is not peered with any network.
+            timeout (int):
+                The maximum job running time in seconds. The default is 7 days.
+            restart_job_on_worker_restart (bool):
+                Restarts the entire CustomJob if a worker
+                gets restarted. This feature can be used by
+                distributed training jobs that are not resilient
+                to workers leaving and joining a job.
+            sync (bool):
+                Whether to execute this method synchronously. If False, this method
+                will unblock and it will be executed in a concurrent Future.    
+        """
 
         if service_account:
             self._gca_resource.service_account = service_account

From 23b3249d61876cd724e1e7f676502483713bddf5 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 15:58:52 -0400
Subject: [PATCH 13/29] feat: add custom job and hp tuning job tests

---
 google/cloud/aiplatform/base.py               |  23 ++
 .../cloud/aiplatform/hyperparameter_tuning.py | 101 ++++-
 google/cloud/aiplatform/jobs.py               | 145 ++++++-
 google/cloud/aiplatform/utils/source_utils.py |  17 +
 .../aiplatform/utils/worker_spec_utils.py     |  16 +
 tests/unit/aiplatform/test_custom_job.py      | 323 +++++++++++++++
 .../test_hyperparametertuning_job.py          | 369 ++++++++++++++++++
 7 files changed, 967 insertions(+), 27 deletions(-)
 create mode 100644 tests/unit/aiplatform/test_custom_job.py
 create mode 100644 tests/unit/aiplatform/test_hyperparametertuning_job.py

diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py
index b42b630f75..a9fcef24bd 100644
--- a/google/cloud/aiplatform/base.py
+++ b/google/cloud/aiplatform/base.py
@@ -101,6 +101,29 @@ def log_create_complete(
             f"{variable_name} = aiplatform.{cls.__name__}('{resource.name}')"
         )
 
+    def log_create_complete_with_getter(
+        self,
+        cls: Type["AiPlatformResourceNoun"],
+        resource: proto.Message,
+        variable_name: str,
+    ):
+        """Logs create event is complete.
+
+        Will also include code snippet to instantiate resource in SDK.
+
+        Args:
+            cls (AiPlatformResourceNoun):
+                AI Platform Resource Noun class that is being created.
+            resource (proto.Message):
+                AI Platform Resourc proto.Message
+            variable_name (str): Name of variable to use for code snippet
+        """
+        self._logger.info(f"{cls.__name__} created. Resource name: {resource.name}")
+        self._logger.info(f"To use this {cls.__name__} in another session:")
+        self._logger.info(
+            f"{variable_name} = aiplatform.{cls.__name__}.get('{resource.name}')"
+        )
+
     def log_action_start_against_resource(
         self, action: str, noun: str, resource_noun_obj: "AiPlatformResourceNoun"
     ):
diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index de9ecac84e..765de6f422 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -1,18 +1,38 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 import abc
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import proto
 
 from google.cloud.aiplatform.compat.types import study as gca_study_compat
 
-_scale_type_map = {
+_SCALE_TYPE_MAP = {
     "linear": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
     "log": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LOG_SCALE,
     "reverse_log": gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_REVERSE_LOG_SCALE,
+    "unspecified": gca_study_compat.StudySpec.ParameterSpec.ScaleType.SCALE_TYPE_UNSPECIFIED,
 }
 
 
 class _ParameterSpec(metaclass=abc.ABCMeta):
+    """Base class represents a single parameter to optimize."""
+
     def __init__(
         self,
         conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
@@ -26,22 +46,26 @@ def __init__(
     @classmethod
     @abc.abstractmethod
     def _proto_parameter_value_class(self) -> proto.Message:
+        """The proto represenation of this parameter."""
         pass
 
     @property
     @classmethod
     @abc.abstractmethod
     def _parameter_value_map(self) -> Tuple[Tuple[str, str]]:
+        """A Tuple map of parameter key to underlying proto key."""
         pass
 
     @property
     @classmethod
     @abc.abstractmethod
     def _parameter_spec_value_key(self) -> Tuple[Tuple[str, str]]:
+        """The ParameterSpec key this parameter should be assigned."""
         pass
 
     @property
     def _proto_parameter_value_spec(self) -> proto.Message:
+        """Converts this parameter to it's parameter value representation."""
         proto_parameter_value_spec = self._proto_parameter_value_class()
         for self_attr_key, proto_attr_key in self._parameter_value_map:
             setattr(
@@ -52,10 +76,11 @@ def _proto_parameter_value_spec(self) -> proto.Message:
     def _to_parameter_spec(
         self, parameter_id: str
     ) -> gca_study_compat.StudySpec.ParameterSpec:
+        """Converts this parameter to ParameterSpec."""
         # TODO: Conditional parameters
         parameter_spec = gca_study_compat.StudySpec.ParameterSpec(
             parameter_id=parameter_id,
-            scale_type=_scale_type_map.get(getattr(self, "scale")),
+            scale_type=_SCALE_TYPE_MAP.get(getattr(self, "scale", "unspecified")),
         )
 
         setattr(
@@ -83,6 +108,21 @@ def __init__(
         conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
+        """
+        Value specification for a parameter in ``DOUBLE`` type.
+
+        Args:
+            min (float):
+                Required. Inclusive minimum value of the
+                parameter.
+            max (float):
+                Required. Inclusive maximum value of the
+                parameter.
+            scale (str): 
+                Required. The type of scaling that should be applied to this parameter.
+
+                Accepts: 'linear', 'log', 'reverse_log'
+        """
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
@@ -110,59 +150,94 @@ def __init__(
         conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
+        """
+        Value specification for a parameter in ``INTEGER`` type.
+
+        Args:
+            min (float):
+                Required. Inclusive minimum value of the
+                parameter.
+            max (float):
+                Required. Inclusive maximum value of the
+                parameter.
+            scale (str): 
+                Required. The type of scaling that should be applied to this parameter.
+
+                Accepts: 'linear', 'log', 'reverse_log'
+        """
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values,
+            parent_values=parent_values,
         )
 
         self.min = min
-        self.max = (max,)
+        self.max = max
         self.scale = scale
 
 
-class CategoricalValueSpec(_ParameterSpec):
+class CategoricalParameterSpec(_ParameterSpec):
 
     _proto_parameter_value_class = (
         gca_study_compat.StudySpec.ParameterSpec.CategoricalValueSpec
     )
-    _parameter_value_map = ("values", "values")
+    _parameter_value_map = (("values", "values"),)
     _parameter_spec_value_key = "categorical_value_spec"
 
     def __init__(
         self,
-        values: List[str],
+        values: Sequence[str],
         conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
+        """Value specification for a parameter in ``CATEGORICAL`` type.
+
+        Args:
+            values (Sequence[str]): 
+                Required. The list of possible categories.
+        """
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values,
+            parent_values=parent_values,
         )
 
         self.values = values
 
 
-class DiscreteValueSpec(_ParameterSpec):
+class DiscreteParameterSpec(_ParameterSpec):
 
     _proto_parameter_value_class = (
         gca_study_compat.StudySpec.ParameterSpec.DiscreteValueSpec
     )
-    _parameter_value_map = ("values", "values")
+    _parameter_value_map = (("values", "values"),)
     _parameter_spec_value_key = "discrete_value_spec"
 
     def __init__(
         self,
-        values: List[float],
+        values: Sequence[float],
         scale: str,
         conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
         parent_values: Optional[List[Union[float, int, str]]] = None,
     ):
+        """Value specification for a parameter in ``DISCRETE`` type.
+
+        values (Sequence[float]):
+            Required. A list of possible values.
+            The list should be in increasing order and at
+            least 1e-10 apart. For instance, this parameter
+            might have possible settings of 1.5, 2.5, and
+            4.0. This list should not contain more than
+            1,000 values.
+        scale (str): 
+                Required. The type of scaling that should be applied to this parameter.
+
+                Accepts: 'linear', 'log', 'reverse_log'
+        """
 
         super().__init__(
             conditional_parameter_spec=conditional_parameter_spec,
-            parent_value=parent_values,
+            parent_values=parent_values,
         )
 
         self.values = values
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 9e58f5d1c4..9e36189877 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -184,12 +184,20 @@ def _block_until_complete(self):
                 previous_time = current_time
             time.sleep(wait)
 
-        _LOGGER.log_action_completed_against_resource("run", "completed", self)
-
+        _LOGGER.info(
+            "%s %s current state:\n%s"
+            % (
+                self.__class__.__name__,
+                self._gca_resource.name,
+                self._gca_resource.state,
+            )
+        )
         # Error is only populated when the job state is
         # JOB_STATE_FAILED or JOB_STATE_CANCELLED.
-        if self.state in _JOB_ERROR_STATES:
+        if self._gca_resource.state in _JOB_ERROR_STATES:
             raise RuntimeError("Job failed with:\n%s" % self._gca_resource.error)
+        else:
+            _LOGGER.log_action_completed_against_resource("run", "completed", self)
 
     @classmethod
     def list(
@@ -809,7 +817,7 @@ def run(self) -> None:
     @property
     def _has_run(self) -> bool:
         """Property returns true if this class has a resource name."""
-        return bool(getattr(self._gca_resource, "name"))
+        return bool(self._gca_resource.name)
 
     @property
     def state(self) -> gca_job_state.JobState:
@@ -872,8 +880,8 @@ class DataLabelingJob(_Job):
 
 
 class CustomJob(_RunnableJob):
-    """Creates an AI Platform (Unified) Custom Job."""
-    
+    """AI Platform (Unified) Custom Job."""
+
     _resource_noun = "customJobs"
     _getter_method = "get_custom_job"
     _list_method = "list_custom_job"
@@ -881,7 +889,6 @@ class CustomJob(_RunnableJob):
     _delete_method = "delete_custom_job"
     _job_type = "training"
 
-    
     def __init__(
         self,
         display_name: str,
@@ -924,7 +931,10 @@ def __init__(
 
 
         Args:
-            display_name (str): Required. The user-defined name of this Custom Job.
+            display_name (str):
+                Required. The user-defined name of the HyperparameterTuningJob.
+                The name can be up to 128 characters long and can be consist
+                of any UTF-8 characters.
             worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]): 
                 Required. The spec of the worker pools including machine type and Docker image.
                 Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages.
@@ -1042,7 +1052,7 @@ def from_local_script(
         Raises:
             RuntimeError is not staging bucket was set using aiplatfrom.init and a staging
             bucket was not passed in.
-        """ 
+        """
 
         project = project or initializer.global_config.project
         location = location or initializer.global_config.location
@@ -1128,10 +1138,10 @@ def run(
         """
 
         if service_account:
-            self._gca_resource.service_account = service_account
+            self._gca_resource.job_spec.service_account = service_account
 
         if network:
-            self._gca_resource.network = network
+            self._gca_resource.job_spec.network = network
 
         if timeout or restart_job_on_worker_restart:
             timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
@@ -1146,7 +1156,9 @@ def run(
             parent=self._parent, custom_job=self._gca_resource
         )
 
-        _LOGGER.log_create_complete(self.__class__, self._gca_resource, "custom_job")
+        _LOGGER.log_create_complete_with_getter(
+            self.__class__, self._gca_resource, "custom_job"
+        )
 
         _LOGGER.info("View Custom Job:\n%s" % self._dashboard_uri())
 
@@ -1168,8 +1180,9 @@ def job_spec(self):
 }
 
 
-
 class HyperparameterTuningJob(_RunnableJob):
+    """AI Pltatform(Unified) HyperparameterTuning Job"""
+
     _resource_noun = "hyperparameterTuningJobs"
     _getter_method = "get_hyperparameter_tuning_job"
     _list_method = "list_hyperparameter_tuning_jobs"
@@ -1193,6 +1206,86 @@ def __init__(
         credentials: Optional[auth_credentials.Credentials] = None,
         encryption_spec_key_name: Optional[str] = None,
     ):
+        """
+        Configures a HyperparameterTuning Job.
+
+        Args:
+            display_name (str):
+                Required. The user-defined name of the HyperparameterTuningJob.
+                The name can be up to 128 characters long and can be consist
+                of any UTF-8 characters.
+            custom_job (aiplatform.CustomJob):
+                Required. Configured CustomJob. The worker pool spec from this custom job
+                applies to the CustomJobs created in all the trials.
+            metric_spec: Dict[str, str]
+                Required. Dicionary representing metrics to optimize. The dictionary key is the metric_id,
+                which is reported by your training job, and the dictionary value is the 
+                optimization goal of the metric('minimize' or 'maximize'). example:
+
+                metric_spec = {'loss': 'minimize', 'accuracy': 'maximize'}
+
+            parameter_spec (Dict[str, hyperparameter_tuning._ParameterSpec]):
+                Required. Dictionary representing parameters to optimize. The dictionary key is the metric_id,
+                which is passed into your training job as a command line key word arguemnt, and the
+                dictionary value is the parameter specification of the metric.
+
+                
+                from google.cloud.aiplatform import hyperparameter_tuning as hpt
+                
+                parameter_spec={
+                    'decay': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'),
+                    'learning_rate': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear')
+                    'batch_size': hpt.DiscreteParamterSpec(values=[4, 8, 16, 32, 64, 128], scale='linear')
+                }
+                
+                Supported parameter specifications can be found until aiplatform.hyperparameter_tuning.
+                These parameter specification are currently supported:
+                DoubleParameterSpec, IntegerParameterSpec, CategoricalParameterSpace, DiscreteParameterSpec
+
+            max_trial_count (int):
+                Reuired. The desired total number of Trials.
+            parallel_trial_count (int):
+                Required. The desired number of Trials to run in parallel.
+            max_failed_trial_count (int):
+                Optional. The number of failed Trials that need to be
+                seen before failing the HyperparameterTuningJob.
+                If set to 0, AI Platform decides how many Trials
+                must fail before the whole job fails.
+            search_algorithm (str):
+                The search algorithm specified for the Study.
+
+                Accepts: 'random', 'grid'
+            measurement_selection (str):
+                This indicates which measurement to use if/when the service
+                automatically selects the final measurement from previously reported
+                intermediate measurements.
+
+                Accepts: 'best', 'last'
+
+                Choose this based on two considerations:
+                A) Do you expect your measurements to monotonically improve? If so,
+                choose 'last'. On the other hand, if you're in a situation
+                where your system can "over-train" and you expect the performance to
+                get better for a while but then start declining, choose
+                'best'. B) Are your measurements significantly noisy
+                and/or irreproducible? If so, 'best' will tend to be
+                over-optimistic, and it may be better to choose 'last'. If
+                both or neither of (A) and (B) apply, it doesn't matter which
+                selection type is chosen.
+            project (str):
+                Optional. Project to run the HyperparameterTuningjob in. Overrides project set in aiplatform.init.
+            location (str):
+                Optional. Location to run the HyperparameterTuning in. Overrides location set in aiplatform.init.
+            credentials (auth_credentials.Credentials):
+                Optional. Custom credentials to use to run call HyperparameterTuning service. Overrides
+                credentials set in aiplatform.init.
+            encryption_spec_key_name (str):
+                Optional. Customer-managed encryption key options for a
+                HyperparameterTuningJob. If this is set, then
+                all resources created by the
+                HyperparameterTuningJob will be encrypted with
+                the provided encryption key.              
+        """
         super().__init__(project=project, location=location, credentials=credentials)
 
         metrics = [
@@ -1237,6 +1330,28 @@ def run(
         restart_job_on_worker_restart: bool = False,
         sync: bool = True,
     ) -> None:
+        """Run this configured CustomJob.
+
+        Args:
+            service_account (str):
+                Optional. Specifies the service account for workload run-as account.
+                Users submitting jobs must have act-as permission on this run-as account.
+            network (str):
+                Optional. The full name of the Compute Engine network to which the job
+                should be peered. For example, projects/12345/global/networks/myVPC.
+                Private services access must already be configured for the network.
+                If left unspecified, the job is not peered with any network.
+            timeout (int):
+                The maximum job running time in seconds. The default is 7 days.
+            restart_job_on_worker_restart (bool):
+                Restarts the entire CustomJob if a worker
+                gets restarted. This feature can be used by
+                distributed training jobs that are not resilient
+                to workers leaving and joining a job.
+            sync (bool):
+                Whether to execute this method synchronously. If False, this method
+                will unblock and it will be executed in a concurrent Future.    
+        """
 
         if service_account:
             self._gca_resource.trial_job_spec.service_account = service_account
@@ -1257,7 +1372,9 @@ def run(
             parent=self._parent, hyperparameter_tuning_job=self._gca_resource
         )
 
-        _LOGGER.log_create_complete(self.__class__, self._gca_resource, "hpt_job")
+        _LOGGER.log_create_complete_with_getter(
+            self.__class__, self._gca_resource, "hpt_job"
+        )
 
         _LOGGER.info("View HyperparameterTuningJob:\n%s" % self._dashboard_uri())
 
diff --git a/google/cloud/aiplatform/utils/source_utils.py b/google/cloud/aiplatform/utils/source_utils.py
index f84e37b52a..b7fcef806f 100644
--- a/google/cloud/aiplatform/utils/source_utils.py
+++ b/google/cloud/aiplatform/utils/source_utils.py
@@ -1,3 +1,20 @@
+# -*- coding: utf-8 -*-
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
 import functools
 import pathlib
 import shutil
diff --git a/google/cloud/aiplatform/utils/worker_spec_utils.py b/google/cloud/aiplatform/utils/worker_spec_utils.py
index ba81352d73..9a681d3b98 100644
--- a/google/cloud/aiplatform/utils/worker_spec_utils.py
+++ b/google/cloud/aiplatform/utils/worker_spec_utils.py
@@ -1,3 +1,19 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 from typing import NamedTuple, Optional, Dict, Union, List
 
 from google.cloud.aiplatform import utils
diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py
new file mode 100644
index 0000000000..3d5e9e510b
--- /dev/null
+++ b/tests/unit/aiplatform/test_custom_job.py
@@ -0,0 +1,323 @@
+# -*- coding: utf-8 -*-
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+
+import copy
+from importlib import reload
+from unittest import mock
+from unittest.mock import patch
+
+from google.protobuf import duration_pb2  # type: ignore
+from google.rpc import status_pb2
+
+import test_training_jobs
+from test_training_jobs import mock_python_package_to_gcs
+
+from google.cloud import aiplatform
+from google.cloud.aiplatform.compat.types import custom_job as gca_custom_job_compat
+from google.cloud.aiplatform.compat.types import io as gca_io_compat
+from google.cloud.aiplatform.compat.types import job_state as gca_job_state_compat
+from google.cloud.aiplatform.compat.types import (
+    encryption_spec as gca_encryption_spec_compat,
+)
+from google.cloud.aiplatform_v1.services.job_service import client as job_service_client
+
+_TEST_PROJECT = "test-project"
+_TEST_LOCATION = "us-central1"
+_TEST_ID = "1028944691210842416"
+_TEST_DISPLAY_NAME = "my_job_1234"
+
+_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}"
+
+_TEST_CUSTOM_JOB_NAME = f"{_TEST_PARENT}/customJobs/{_TEST_ID}"
+
+_TEST_TRAINING_CONTAINER_IMAGE = "gcr.io/test-training/container:image"
+
+_TEST_WORKER_POOL_SPEC = [
+    {
+        "machine_spec": {
+            "machine_type": "n1-standard-4",
+            "accelerator_type": "NVIDIA_TESLA_K80",
+            "accelerator_count": 1,
+        },
+        "replica_count": 1,
+        "container_spec": {
+            "image_uri": _TEST_TRAINING_CONTAINER_IMAGE,
+            "command": [],
+            "args": [],
+        },
+    }
+]
+
+_TEST_STAGING_BUCKET = "gs://test-staging-bucket"
+
+# CMEK encryption
+_TEST_DEFAULT_ENCRYPTION_KEY_NAME = "key_default"
+_TEST_DEFAULT_ENCRYPTION_SPEC = gca_encryption_spec_compat.EncryptionSpec(
+    kms_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME
+)
+
+_TEST_SERVICE_ACCOUNT = "vinnys@my-project.iam.gserviceaccount.com"
+
+
+_TEST_NETWORK = f"projects/{_TEST_PROJECT}/global/networks/{_TEST_ID}"
+
+_TEST_TIMEOUT = 8000
+_TEST_RESTART_JOB_ON_WORKER_RESTART = True
+
+_TEST_BASE_CUSTOM_JOB_PROTO = gca_custom_job_compat.CustomJob(
+    display_name=_TEST_DISPLAY_NAME,
+    job_spec=gca_custom_job_compat.CustomJobSpec(
+        worker_pool_specs=_TEST_WORKER_POOL_SPEC,
+        base_output_directory=gca_io_compat.GcsDestination(
+            output_uri_prefix=_TEST_STAGING_BUCKET
+        ),
+        scheduling=gca_custom_job_compat.Scheduling(
+            timeout=duration_pb2.Duration(seconds=_TEST_TIMEOUT),
+            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
+        ),
+        service_account=_TEST_SERVICE_ACCOUNT,
+        network=_TEST_NETWORK,
+    ),
+    encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
+)
+
+
+def _get_custom_job_proto(state=None, name=None, error=None):
+    custom_job_proto = copy.deepcopy(_TEST_BASE_CUSTOM_JOB_PROTO)
+    custom_job_proto.name = name
+    custom_job_proto.state = state
+    custom_job_proto.error = error
+    return custom_job_proto
+
+
+@pytest.fixture
+def get_custom_job_mock():
+    with patch.object(
+        job_service_client.JobServiceClient, "get_custom_job"
+    ) as get_custom_job_mock:
+        get_custom_job_mock.side_effect = [
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+            ),
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_RUNNING,
+            ),
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED,
+            ),
+        ]
+        yield get_custom_job_mock
+
+
+@pytest.fixture
+def get_custom_job_mock_with_fail():
+    with patch.object(
+        job_service_client.JobServiceClient, "get_custom_job"
+    ) as get_custom_job_mock:
+        get_custom_job_mock.side_effect = [
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+            ),
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_RUNNING,
+            ),
+            _get_custom_job_proto(
+                name=_TEST_CUSTOM_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_FAILED,
+                error=status_pb2.Status(message="Test Error"),
+            ),
+        ]
+        yield get_custom_job_mock
+
+
+@pytest.fixture
+def create_custom_job_mock():
+    with mock.patch.object(
+        job_service_client.JobServiceClient, "create_custom_job"
+    ) as create_custom_job_mock:
+        create_custom_job_mock.return_value = _get_custom_job_proto(
+            name=_TEST_CUSTOM_JOB_NAME,
+            state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+        )
+        yield create_custom_job_mock
+
+
+class TestCustomJob:
+    def setup_method(self):
+        reload(aiplatform.initializer)
+        reload(aiplatform)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_create_custom_job(self, create_custom_job_mock, get_custom_job_mock, sync):
+
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        job = aiplatform.CustomJob(
+            display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC
+        )
+
+        job.run(
+            service_account=_TEST_SERVICE_ACCOUNT,
+            network=_TEST_NETWORK,
+            timeout=_TEST_TIMEOUT,
+            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
+            sync=sync,
+        )
+
+        job.wait()
+
+        expected_custom_job = _get_custom_job_proto()
+
+        create_custom_job_mock.assert_called_once_with(
+            parent=_TEST_PARENT, custom_job=expected_custom_job
+        )
+
+        assert job.job_spec == expected_custom_job.job_spec
+        assert (
+            job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED
+        )
+
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_run_custom_job_with_fail_raises(
+        self, create_custom_job_mock, get_custom_job_mock_with_fail, sync
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        job = aiplatform.CustomJob(
+            display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC
+        )
+
+        with pytest.raises(RuntimeError):
+            job.run(
+                service_account=_TEST_SERVICE_ACCOUNT,
+                network=_TEST_NETWORK,
+                timeout=_TEST_TIMEOUT,
+                restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
+                sync=sync,
+            )
+
+            job.wait()
+
+        expected_custom_job = _get_custom_job_proto()
+
+        create_custom_job_mock.assert_called_once_with(
+            parent=_TEST_PARENT, custom_job=expected_custom_job
+        )
+
+        assert job.job_spec == expected_custom_job.job_spec
+        assert job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_FAILED
+
+    def test_custom_job_get_state_raises_without_run(self):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        job = aiplatform.CustomJob(
+            display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC
+        )
+
+        with pytest.raises(RuntimeError):
+            print(job.state)
+
+    def test_no_staging_bucket_raises(self):
+
+        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
+
+        with pytest.raises(RuntimeError):
+            job = aiplatform.CustomJob(
+                display_name=_TEST_DISPLAY_NAME,
+                worker_pool_specs=_TEST_WORKER_POOL_SPEC,
+            )
+
+    def test_get_custom_job(self, get_custom_job_mock):
+
+        job = aiplatform.CustomJob.get(_TEST_CUSTOM_JOB_NAME)
+
+        get_custom_job_mock.assert_called_once_with(name=_TEST_CUSTOM_JOB_NAME)
+        assert (
+            job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_PENDING
+        )
+        assert job.job_spec == _TEST_BASE_CUSTOM_JOB_PROTO.job_spec
+
+    @pytest.mark.usefixtures("mock_python_package_to_gcs")
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_create_from_local_script(
+        self, get_custom_job_mock, create_custom_job_mock, sync
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        # configuration on this is tested in test_training_jobs.py
+        job = aiplatform.CustomJob.from_local_script(
+            display_name=_TEST_DISPLAY_NAME,
+            script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME,
+            container_uri=_TEST_TRAINING_CONTAINER_IMAGE,
+        )
+
+        job.run(sync=sync)
+
+        job.wait()
+
+        assert (
+            job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED
+        )
+
+    @pytest.mark.usefixtures("mock_python_package_to_gcs")
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_create_from_local_script_raises_with_no_staging_bucket(
+        self, get_custom_job_mock, create_custom_job_mock, sync
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        with pytest.raises(RuntimeError):
+
+            # configuration on this is tested in test_training_jobs.py
+            job = aiplatform.CustomJob.from_local_script(
+                display_name=_TEST_DISPLAY_NAME,
+                script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME,
+                container_uri=_TEST_TRAINING_CONTAINER_IMAGE,
+            )
diff --git a/tests/unit/aiplatform/test_hyperparametertuning_job.py b/tests/unit/aiplatform/test_hyperparametertuning_job.py
new file mode 100644
index 0000000000..d406e07efa
--- /dev/null
+++ b/tests/unit/aiplatform/test_hyperparametertuning_job.py
@@ -0,0 +1,369 @@
+# -*- coding: utf-8 -*-
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+
+import copy
+from importlib import reload
+from unittest import mock
+from unittest.mock import patch
+
+from google.protobuf import duration_pb2  # type: ignore
+from google.rpc import status_pb2
+
+from google.cloud import aiplatform
+from google.cloud.aiplatform import hyperparameter_tuning as hpt
+from google.cloud.aiplatform.compat.types import job_state as gca_job_state_compat
+from google.cloud.aiplatform.compat.types import (
+    encryption_spec as gca_encryption_spec_compat,
+)
+from google.cloud.aiplatform.compat.types import (
+    hyperparameter_tuning_job as gca_hyperparameter_tuning_job_compat,
+)
+from google.cloud.aiplatform.compat.types import study as gca_study_compat
+from google.cloud.aiplatform_v1.services.job_service import client as job_service_client
+
+import test_custom_job
+
+_TEST_PROJECT = "test-project"
+_TEST_LOCATION = "us-central1"
+_TEST_ID = "1028944691210842416"
+_TEST_DISPLAY_NAME = "my_hp_job_1234"
+
+_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}"
+
+_TEST_STAGING_BUCKET = test_custom_job._TEST_STAGING_BUCKET
+
+_TEST_HYPERPARAMETERTUNING_JOB_NAME = (
+    f"{_TEST_PARENT}/hyperparameterTuningJobs/{_TEST_ID}"
+)
+
+# CMEK encryption
+_TEST_DEFAULT_ENCRYPTION_KEY_NAME = "key_default"
+_TEST_DEFAULT_ENCRYPTION_SPEC = gca_encryption_spec_compat.EncryptionSpec(
+    kms_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME
+)
+
+_TEST_SERVICE_ACCOUNT = "vinnys@my-project.iam.gserviceaccount.com"
+
+
+_TEST_NETWORK = f"projects/{_TEST_PROJECT}/global/networks/{_TEST_ID}"
+
+_TEST_TIMEOUT = 8000
+_TEST_RESTART_JOB_ON_WORKER_RESTART = True
+
+_TEST_METRIC_SPEC_KEY = "test-metric"
+_TEST_METRIC_SPEC_VALUE = "maximize"
+
+_TEST_PARALLEL_TRIAL_COUNT = 8
+_TEST_MAX_TRIAL_COUNT = 64
+_TEST_MAX_FAILED_TRIAL_COUNT = 4
+_TEST_SEARCH_ALGORITHM = "random"
+_TEST_MEASUREMENT_SELECTION = "best"
+
+
+_TEST_BASE_HYPERPARAMETER_TUNING_JOB_PROTO = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
+    display_name=_TEST_DISPLAY_NAME,
+    study_spec=gca_study_compat.StudySpec(
+        metrics=[
+            gca_study_compat.StudySpec.MetricSpec(
+                metric_id=_TEST_METRIC_SPEC_KEY, goal=_TEST_METRIC_SPEC_VALUE.upper()
+            )
+        ],
+        parameters=[
+            gca_study_compat.StudySpec.ParameterSpec(
+                parameter_id="lr",
+                scale_type=gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LOG_SCALE,
+                double_value_spec=gca_study_compat.StudySpec.ParameterSpec.DoubleValueSpec(
+                    min_value=0.001, max_value=0.1
+                ),
+            ),
+            gca_study_compat.StudySpec.ParameterSpec(
+                parameter_id="units",
+                scale_type=gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
+                integer_value_spec=gca_study_compat.StudySpec.ParameterSpec.IntegerValueSpec(
+                    min_value=4, max_value=1028
+                ),
+            ),
+            gca_study_compat.StudySpec.ParameterSpec(
+                parameter_id="activation",
+                categorical_value_spec=gca_study_compat.StudySpec.ParameterSpec.CategoricalValueSpec(
+                    values=["relu", "sigmoid", "elu", "selu", "tanh"]
+                ),
+            ),
+            gca_study_compat.StudySpec.ParameterSpec(
+                parameter_id="batch_size",
+                scale_type=gca_study_compat.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
+                discrete_value_spec=gca_study_compat.StudySpec.ParameterSpec.DiscreteValueSpec(
+                    values=[16, 32]
+                ),
+            ),
+        ],
+        algorithm=gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
+        measurement_selection_type=gca_study_compat.StudySpec.MeasurementSelectionType.BEST_MEASUREMENT,
+    ),
+    parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
+    max_trial_count=_TEST_MAX_TRIAL_COUNT,
+    max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
+    trial_job_spec=test_custom_job._TEST_BASE_CUSTOM_JOB_PROTO.job_spec,
+    encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
+)
+
+
+def _get_hyperparameter_tuning_job_proto(state=None, name=None, error=None):
+    custom_job_proto = copy.deepcopy(_TEST_BASE_HYPERPARAMETER_TUNING_JOB_PROTO)
+    custom_job_proto.name = name
+    custom_job_proto.state = state
+    custom_job_proto.error = error
+    return custom_job_proto
+
+
+@pytest.fixture
+def get_hyperparameter_tuning_job_mock():
+    with patch.object(
+        job_service_client.JobServiceClient, "get_hyperparameter_tuning_job"
+    ) as get_hyperparameter_tuning_job_mock:
+        get_hyperparameter_tuning_job_mock.side_effect = [
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+            ),
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_RUNNING,
+            ),
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED,
+            ),
+        ]
+        yield get_hyperparameter_tuning_job_mock
+
+
+@pytest.fixture
+def get_hyperparameter_tuning_job_mock_with_fail():
+    with patch.object(
+        job_service_client.JobServiceClient, "get_hyperparameter_tuning_job"
+    ) as get_hyperparameter_tuning_job_mock:
+        get_hyperparameter_tuning_job_mock.side_effect = [
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+            ),
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_RUNNING,
+            ),
+            _get_hyperparameter_tuning_job_proto(
+                name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+                state=gca_job_state_compat.JobState.JOB_STATE_FAILED,
+                error=status_pb2.Status(message="Test Error"),
+            ),
+        ]
+        yield get_hyperparameter_tuning_job_mock
+
+
+@pytest.fixture
+def create_hyperparameter_tuning_job_mock():
+    with mock.patch.object(
+        job_service_client.JobServiceClient, "create_hyperparameter_tuning_job"
+    ) as create_hyperparameter_tuning_job_mock:
+        create_hyperparameter_tuning_job_mock.return_value = _get_hyperparameter_tuning_job_proto(
+            name=_TEST_HYPERPARAMETERTUNING_JOB_NAME,
+            state=gca_job_state_compat.JobState.JOB_STATE_PENDING,
+        )
+        yield create_hyperparameter_tuning_job_mock
+
+
+class TestCustomJob:
+    def setup_method(self):
+        reload(aiplatform.initializer)
+        reload(aiplatform)
+
+    def teardown_method(self):
+        aiplatform.initializer.global_pool.shutdown(wait=True)
+
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_create_hyperparameter_tuning_job(
+        self,
+        create_hyperparameter_tuning_job_mock,
+        get_hyperparameter_tuning_job_mock,
+        sync,
+    ):
+
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        custom_job = aiplatform.CustomJob(
+            display_name=test_custom_job._TEST_DISPLAY_NAME,
+            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
+        )
+
+        job = aiplatform.HyperparameterTuningJob(
+            display_name=_TEST_DISPLAY_NAME,
+            custom_job=custom_job,
+            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
+            parameter_spec={
+                "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
+                "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
+                "activation": hpt.CategoricalParameterSpec(
+                    values=["relu", "sigmoid", "elu", "selu", "tanh"]
+                ),
+                "batch_size": hpt.DiscreteParameterSpec(
+                    values=[16, 32], scale="linear"
+                ),
+            },
+            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
+            max_trial_count=_TEST_MAX_TRIAL_COUNT,
+            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
+            search_algorithm=_TEST_SEARCH_ALGORITHM,
+            measurement_selection=_TEST_MEASUREMENT_SELECTION,
+        )
+
+        job.run(
+            service_account=_TEST_SERVICE_ACCOUNT,
+            network=_TEST_NETWORK,
+            timeout=_TEST_TIMEOUT,
+            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
+            sync=sync,
+        )
+
+        job.wait()
+
+        expected_hyperparameter_tuning_job = _get_hyperparameter_tuning_job_proto()
+
+        create_hyperparameter_tuning_job_mock.assert_called_once_with(
+            parent=_TEST_PARENT,
+            hyperparameter_tuning_job=expected_hyperparameter_tuning_job,
+        )
+
+        assert (
+            job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED
+        )
+
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_run_hyperparameter_tuning_job_with_fail_raises(
+        self,
+        create_hyperparameter_tuning_job_mock,
+        get_hyperparameter_tuning_job_mock_with_fail,
+        sync,
+    ):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        custom_job = aiplatform.CustomJob(
+            display_name=test_custom_job._TEST_DISPLAY_NAME,
+            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
+        )
+
+        job = aiplatform.HyperparameterTuningJob(
+            display_name=_TEST_DISPLAY_NAME,
+            custom_job=custom_job,
+            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
+            parameter_spec={
+                "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
+                "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
+                "activation": hpt.CategoricalParameterSpec(
+                    values=["relu", "sigmoid", "elu", "selu", "tanh"]
+                ),
+                "batch_size": hpt.DiscreteParameterSpec(
+                    values=[16, 32], scale="linear"
+                ),
+            },
+            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
+            max_trial_count=_TEST_MAX_TRIAL_COUNT,
+            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
+            search_algorithm=_TEST_SEARCH_ALGORITHM,
+            measurement_selection=_TEST_MEASUREMENT_SELECTION,
+        )
+
+        with pytest.raises(RuntimeError):
+            job.run(
+                service_account=_TEST_SERVICE_ACCOUNT,
+                network=_TEST_NETWORK,
+                timeout=_TEST_TIMEOUT,
+                restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
+                sync=sync,
+            )
+
+            job.wait()
+
+        expected_hyperparameter_tuning_job = _get_hyperparameter_tuning_job_proto()
+
+        create_hyperparameter_tuning_job_mock.assert_called_once_with(
+            parent=_TEST_PARENT,
+            hyperparameter_tuning_job=expected_hyperparameter_tuning_job,
+        )
+
+        assert job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_FAILED
+
+    def test_hyperparameter_tuning_job_get_state_raises_without_run(self):
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+            staging_bucket=_TEST_STAGING_BUCKET,
+            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
+        )
+
+        custom_job = aiplatform.CustomJob(
+            display_name=test_custom_job._TEST_DISPLAY_NAME,
+            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
+        )
+
+        job = aiplatform.HyperparameterTuningJob(
+            display_name=_TEST_DISPLAY_NAME,
+            custom_job=custom_job,
+            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
+            parameter_spec={
+                "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
+                "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
+                "activation": hpt.CategoricalParameterSpec(
+                    values=["relu", "sigmoid", "elu", "selu", "tanh"]
+                ),
+                "batch_size": hpt.DiscreteParameterSpec(
+                    values=[16, 32, 64], scale="linear"
+                ),
+            },
+            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
+            max_trial_count=_TEST_MAX_TRIAL_COUNT,
+            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
+            search_algorithm=_TEST_SEARCH_ALGORITHM,
+            measurement_selection=_TEST_MEASUREMENT_SELECTION,
+        )
+
+        with pytest.raises(RuntimeError):
+            print(job.state)
+
+    def test_get_hyperparameter_tuning_job(self, get_hyperparameter_tuning_job_mock):
+
+        job = aiplatform.HyperparameterTuningJob.get(
+            _TEST_HYPERPARAMETERTUNING_JOB_NAME
+        )
+
+        get_hyperparameter_tuning_job_mock.assert_called_once_with(
+            name=_TEST_HYPERPARAMETERTUNING_JOB_NAME
+        )
+        assert (
+            job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_PENDING
+        )

From bf9452b662f93390735f0878b083d2384afec02b Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 16:10:03 -0400
Subject: [PATCH 14/29] chore: remove training utils. We will re-evaluate
 whether to add these as a separate package

---
 google/cloud/aiplatform/__init__.py          |   2 -
 google/cloud/aiplatform/training_utils.py    | 187 -------------------
 tests/unit/aiplatform/test_training_utils.py | 144 --------------
 3 files changed, 333 deletions(-)
 delete mode 100644 google/cloud/aiplatform/training_utils.py
 delete mode 100644 tests/unit/aiplatform/test_training_utils.py

diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py
index d544b4bc8f..6aa8f64161 100644
--- a/google/cloud/aiplatform/__init__.py
+++ b/google/cloud/aiplatform/__init__.py
@@ -45,7 +45,6 @@
     AutoMLTextTrainingJob,
     AutoMLVideoTrainingJob,
 )
-from google.cloud.aiplatform import training_utils
 
 """
 Usage:
@@ -72,7 +71,6 @@
     "get_experiment_df",
     "get_pipeline_df",
     "start_run",
-    "training_utils",
     "AutoMLImageTrainingJob",
     "AutoMLTabularTrainingJob",
     "AutoMLForecastingTrainingJob",
diff --git a/google/cloud/aiplatform/training_utils.py b/google/cloud/aiplatform/training_utils.py
deleted file mode 100644
index 95e4d2429a..0000000000
--- a/google/cloud/aiplatform/training_utils.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import collections
-import json
-import os
-import time
-from typing import Dict, Optional
-
-
-class EnvironmentVariables:
-    """Passes on OS' environment variables."""
-
-    @property
-    def training_data_uri(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for training data. None if
-            environment variable not set.
-        """
-        return os.environ.get("AIP_TRAINING_DATA_URI")
-
-    @property
-    def validation_data_uri(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for validation data. None
-            if environment variable not set.
-        """
-        return os.environ.get("AIP_VALIDATION_DATA_URI")
-
-    @property
-    def test_data_uri(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for test data. None if
-            environment variable not set.
-        """
-        return os.environ.get("AIP_TEST_DATA_URI")
-
-    @property
-    def model_dir(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for saving model artefacts.
-            None if environment variable not set.
-        """
-        return os.environ.get("AIP_MODEL_DIR")
-
-    @property
-    def checkpoint_dir(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for saving checkpoints.
-            None if environment variable not set.
-        """
-        return os.environ.get("AIP_CHECKPOINT_DIR")
-
-    @property
-    def tensorboard_log_dir(self) -> Optional[str]:
-        """
-        Returns:
-            Cloud Storage URI of a directory intended for saving TensorBoard logs.
-            None if environment variable not set.
-        """
-        return os.environ.get("AIP_TENSORBOARD_LOG_DIR")
-
-    @property
-    def cluster_spec(self) -> Optional[Dict]:
-        """
-        Returns:
-            json string as described in https://cloud.google.com/ai-platform-unified/docs/training/distributed-training#cluster-variables
-            None if environment variable not set.
-        """
-        cluster_spec_env = os.environ.get("CLUSTER_SPEC")
-        if cluster_spec_env is not None:
-            return json.loads(cluster_spec_env)
-        else:
-            return None
-
-    @property
-    def tf_config(self) -> Optional[Dict]:
-        """
-        Returns:
-            json string as described in https://cloud.google.com/ai-platform-unified/docs/training/distributed-training#tf-config
-            None if environment variable not set.
-        """
-        tf_config_env = os.environ.get("TF_CONFIG")
-        if tf_config_env is not None:
-            return json.loads(tf_config_env)
-        else:
-            return None
-
-
-_DEFAULT_HYPERPARAMETER_METRIC_TAG = "training/hptuning/metric"
-_DEFAULT_METRIC_PATH = "/tmp/hypertune/output.metrics"
-# TODO(0olwzo0): consider to make it configurable
-_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE = 100
-
-
-class _HyperparameterTuningJobReporterSingleton:
-    """Main class for HyperTune."""
-
-    initialized = False
-
-    @classmethod
-    def initialize(cls):
-        if cls.initialized:
-            return
-
-        cls.metric_path = os.environ.get(
-            "CLOUD_ML_HP_METRIC_FILE", _DEFAULT_METRIC_PATH
-        )
-        if not os.path.exists(os.path.dirname(cls.metric_path)):
-            os.makedirs(os.path.dirname(cls.metric_path))
-
-        cls.trial_id = os.environ.get("CLOUD_ML_TRIAL_ID", 0)
-        cls.metrics_queue = collections.deque(
-            maxlen=_MAX_NUM_METRIC_ENTRIES_TO_PRESERVE
-        )
-
-        cls.initialized = True
-
-    @classmethod
-    def _dump_metrics_to_file(cls):
-        with open(cls.metric_path, "w") as metric_file:
-            for metric in cls.metrics_queue:
-                metric_file.write(json.dumps(metric, sort_keys=True) + "\n")
-
-    @classmethod
-    def report_hyperparameter_tuning_metric(
-        cls,
-        hyperparameter_metric_tag,
-        metric_value,
-        global_step=None,
-        checkpoint_path="",
-    ):
-        """Method to report hyperparameter tuning metric.
-        Args:
-          hyperparameter_metric_tag: The hyperparameter metric name this metric
-            value is associated with. Should keep consistent with the tag
-            specified in HyperparameterSpec.
-          metric_value: float, the values for the hyperparameter metric to report.
-          global_step: int, the global step this metric value is associated with.
-          checkpoint_path: The checkpoint path which can be used to warmstart from.
-        """
-        metric_value = float(metric_value)
-        metric_tag = _DEFAULT_HYPERPARAMETER_METRIC_TAG
-        if hyperparameter_metric_tag:
-            metric_tag = hyperparameter_metric_tag
-        metric_body = {
-            "timestamp": time.time(),
-            "trial": str(cls.trial_id),
-            metric_tag: str(metric_value),
-            "global_step": str(int(global_step) if global_step else 0),
-            "checkpoint_path": checkpoint_path,
-        }
-        cls.metrics_queue.append(metric_body)
-        cls._dump_metrics_to_file()
-
-
-def report_hyperparameter_tuning_metrics(
-    metrics: Dict[str, float], global_step: Optional[int] = None, checkpoint_path=""
-):
-    _HyperparameterTuningJobReporterSingleton.initialize()
-
-    for hyperparameter_metric_tag, metric_value in metrics.items():
-        _HyperparameterTuningJobReporterSingleton.report_hyperparameter_tuning_metric(
-            hyperparameter_metric_tag=hyperparameter_metric_tag,
-            metric_value=metric_value,
-            global_step=global_step,
-            checkpoint_path=checkpoint_path,
-        )
diff --git a/tests/unit/aiplatform/test_training_utils.py b/tests/unit/aiplatform/test_training_utils.py
deleted file mode 100644
index 1d4b839151..0000000000
--- a/tests/unit/aiplatform/test_training_utils.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import json
-import os
-import pytest
-
-from google.cloud.aiplatform import training_utils
-from unittest import mock
-
-_TEST_TRAINING_DATA_URI = "gs://training-data-uri"
-_TEST_VALIDATION_DATA_URI = "gs://test-validation-data-uri"
-_TEST_TEST_DATA_URI = "gs://test-data-uri"
-_TEST_MODEL_DIR = "gs://test-model-dir"
-_TEST_CHECKPOINT_DIR = "gs://test-checkpoint-dir"
-_TEST_TENSORBOARD_LOG_DIR = "gs://test-tensorboard-log-dir"
-_TEST_CLUSTER_SPEC = """{
-    "cluster": {
-        "worker_pools":[
-            {
-                "index":0,
-                "replicas":[
-                    "training-workerpool0-ab-0:2222"
-                ]
-            },
-            {
-                "index":1,
-                "replicas":[
-                    "training-workerpool1-ab-0:2222",
-                    "training-workerpool1-ab-1:2222"
-                ]
-            }
-        ]
-    },
-    "environment": "cloud",
-    "task": {
-        "worker_pool_index":0,
-        "replica_index":0,
-        "trial":"TRIAL_ID"
-    }
-}"""
-
-
-class TestTrainingUtils:
-    @pytest.fixture
-    def mock_environment(self):
-        env_vars = {
-            "AIP_TRAINING_DATA_URI": _TEST_TRAINING_DATA_URI,
-            "AIP_VALIDATION_DATA_URI": _TEST_VALIDATION_DATA_URI,
-            "AIP_TEST_DATA_URI": _TEST_TEST_DATA_URI,
-            "AIP_MODEL_DIR": _TEST_MODEL_DIR,
-            "AIP_CHECKPOINT_DIR": _TEST_CHECKPOINT_DIR,
-            "AIP_TENSORBOARD_LOG_DIR": _TEST_TENSORBOARD_LOG_DIR,
-            "CLUSTER_SPEC": _TEST_CLUSTER_SPEC,
-            "TF_CONFIG": _TEST_CLUSTER_SPEC,
-        }
-        with mock.patch.dict(os.environ, env_vars):
-            yield
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_training_data_uri(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.training_data_uri == _TEST_TRAINING_DATA_URI
-
-    def test_training_data_uri_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.training_data_uri is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_validation_data_uri(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.validation_data_uri == _TEST_VALIDATION_DATA_URI
-
-    def test_validation_data_uri_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.validation_data_uri is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_test_data_uri(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.test_data_uri == _TEST_TEST_DATA_URI
-
-    def test_test_data_uri_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.test_data_uri is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_model_dir(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.model_dir == _TEST_MODEL_DIR
-
-    def test_model_dir_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.model_dir is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_checkpoint_dir(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.checkpoint_dir == _TEST_CHECKPOINT_DIR
-
-    def test_checkpoint_dir_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.checkpoint_dir is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_tensorboard_log_dir(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.tensorboard_log_dir == _TEST_TENSORBOARD_LOG_DIR
-
-    def test_tensorboard_log_dir_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.tensorboard_log_dir is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_cluster_spec(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.cluster_spec == json.loads(_TEST_CLUSTER_SPEC)
-
-    def test_cluster_spec_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.cluster_spec is None
-
-    @pytest.mark.usefixtures("mock_environment")
-    def test_tf_config(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.tf_config == json.loads(_TEST_CLUSTER_SPEC)
-
-    def test_tf_config_none(self):
-        env_vars = training_utils.EnvironmentVariables()
-        assert env_vars.tf_config is None

From 78a29f4911d3da22e2451e9855ba5bf0e42184fa Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 16:16:59 -0400
Subject: [PATCH 15/29] chore: add additional documentation

---
 google/cloud/aiplatform/jobs.py | 54 +++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 9e36189877..5a72596177 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -901,6 +901,7 @@ def __init__(
     ):
         """Cosntruct a Custom Job with Worker Pool Specs.
 
+        ```
         Example usage:
         worker_pool_specs = [
                 {
@@ -924,6 +925,7 @@ def __init__(
         )
         
         my_job.run()
+        ```
 
 
         For more information on configuring worker pool specs please visit: 
@@ -1209,6 +1211,58 @@ def __init__(
         """
         Configures a HyperparameterTuning Job.
 
+        Example usage:
+
+        ```
+        from google.cloud.aiplatform import hyperparamter_tuning as hpt
+
+        worker_pool_specs = [
+                {
+                    "machine_spec": {
+                        "machine_type": "n1-standard-4",
+                        "accelerator_type": "NVIDIA_TESLA_K80",
+                        "accelerator_count": 1,
+                    },
+                    "replica_count": 1,
+                    "container_spec": {
+                        "image_uri": container_image_uri,
+                        "command": [],
+                        "args": [],
+                    },
+                }
+            ]
+
+        custom_job = aiplatform.CustomJob(
+            display_name='my_job',
+            worker_pool_specs=worker_pool_specs
+        )
+        
+
+        hp_job = aiplatform.HyperparameterTuningJob(
+            display_name='hp-test',
+            custom_job=job,
+            metric_spec={
+                'loss': 'minimize',
+            },
+            parameter_spec={
+                'lr': hpt.DoubleParameterSpec(min=0.001, max=0.1, scale='log'),
+                'units': hpt.IntegerParameterSpec(min=4, max=128, scale='linear'),
+                'activation': hpt.CategoricalParameterSpec(values=['relu', 'selu']),
+                'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear')
+            },
+            max_trial_count=128,
+            parallel_trial_count=8,    
+            )
+
+        hp_job.run()
+
+        print(hp_job.trials)
+        ```
+
+
+        For more information on using hyperparameter tuning please visit:
+        https://cloud.google.com/ai-platform-unified/docs/training/using-hyperparameter-tuning
+
         Args:
             display_name (str):
                 Required. The user-defined name of the HyperparameterTuningJob.

From 2434aaf12f240e2ceeab0df7b621bc5d1c416e11 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 16:20:40 -0400
Subject: [PATCH 16/29] chore: rename test

---
 ...erparametertuning_job.py => test_hyperparameter_tuning_job.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/unit/aiplatform/{test_hyperparametertuning_job.py => test_hyperparameter_tuning_job.py} (100%)

diff --git a/tests/unit/aiplatform/test_hyperparametertuning_job.py b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py
similarity index 100%
rename from tests/unit/aiplatform/test_hyperparametertuning_job.py
rename to tests/unit/aiplatform/test_hyperparameter_tuning_job.py

From 7752fc2b2a4969803b13b852cecb2deb9c11c6f7 Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 16:26:40 -0400
Subject: [PATCH 17/29] chore: remove conditional parameter spec arguments from
 public paramters, will follow up to add them in

---
 .../cloud/aiplatform/hyperparameter_tuning.py | 45 ++++---------------
 1 file changed, 8 insertions(+), 37 deletions(-)

diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 765de6f422..2c265cd4f1 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -101,12 +101,7 @@ class DoubleParameterSpec(_ParameterSpec):
     _parameter_spec_value_key = "double_value_spec"
 
     def __init__(
-        self,
-        min: float,
-        max: float,
-        scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None,
+        self, min: float, max: float, scale: str,
     ):
         """
         Value specification for a parameter in ``DOUBLE`` type.
@@ -124,10 +119,7 @@ def __init__(
                 Accepts: 'linear', 'log', 'reverse_log'
         """
 
-        super().__init__(
-            conditional_parameter_spec=conditional_parameter_spec,
-            parent_values=parent_values,
-        )
+        super().__init__()
 
         self.min = min
         self.max = max
@@ -143,12 +135,7 @@ class IntegerParameterSpec(_ParameterSpec):
     _parameter_spec_value_key = "integer_value_spec"
 
     def __init__(
-        self,
-        min: int,
-        max: int,
-        scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None,
+        self, min: int, max: int, scale: str,
     ):
         """
         Value specification for a parameter in ``INTEGER`` type.
@@ -166,10 +153,7 @@ def __init__(
                 Accepts: 'linear', 'log', 'reverse_log'
         """
 
-        super().__init__(
-            conditional_parameter_spec=conditional_parameter_spec,
-            parent_values=parent_values,
-        )
+        super().__init__()
 
         self.min = min
         self.max = max
@@ -185,10 +169,7 @@ class CategoricalParameterSpec(_ParameterSpec):
     _parameter_spec_value_key = "categorical_value_spec"
 
     def __init__(
-        self,
-        values: Sequence[str],
-        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None,
+        self, values: Sequence[str],
     ):
         """Value specification for a parameter in ``CATEGORICAL`` type.
 
@@ -197,10 +178,7 @@ def __init__(
                 Required. The list of possible categories.
         """
 
-        super().__init__(
-            conditional_parameter_spec=conditional_parameter_spec,
-            parent_values=parent_values,
-        )
+        super().__init__()
 
         self.values = values
 
@@ -214,11 +192,7 @@ class DiscreteParameterSpec(_ParameterSpec):
     _parameter_spec_value_key = "discrete_value_spec"
 
     def __init__(
-        self,
-        values: Sequence[float],
-        scale: str,
-        conditional_parameter_spec: Optional[Dict[str, "_ParameterSpec"]] = None,
-        parent_values: Optional[List[Union[float, int, str]]] = None,
+        self, values: Sequence[float], scale: str,
     ):
         """Value specification for a parameter in ``DISCRETE`` type.
 
@@ -235,10 +209,7 @@ def __init__(
                 Accepts: 'linear', 'log', 'reverse_log'
         """
 
-        super().__init__(
-            conditional_parameter_spec=conditional_parameter_spec,
-            parent_values=parent_values,
-        )
+        super().__init__()
 
         self.values = values
         self.scale = scale

From aab2d9c6ffdf1ef3f1c1357cd4b78a912a07650c Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 17:38:13 -0400
Subject: [PATCH 18/29] chore: lint

---
 .../cloud/aiplatform/hyperparameter_tuning.py |  8 +++----
 google/cloud/aiplatform/jobs.py               | 24 +++++++++----------
 tests/unit/aiplatform/test_custom_job.py      |  6 ++---
 .../test_hyperparameter_tuning_job.py         |  1 -
 4 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 2c265cd4f1..56e9fcda4e 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -113,7 +113,7 @@ def __init__(
             max (float):
                 Required. Inclusive maximum value of the
                 parameter.
-            scale (str): 
+            scale (str):
                 Required. The type of scaling that should be applied to this parameter.
 
                 Accepts: 'linear', 'log', 'reverse_log'
@@ -147,7 +147,7 @@ def __init__(
             max (float):
                 Required. Inclusive maximum value of the
                 parameter.
-            scale (str): 
+            scale (str):
                 Required. The type of scaling that should be applied to this parameter.
 
                 Accepts: 'linear', 'log', 'reverse_log'
@@ -174,7 +174,7 @@ def __init__(
         """Value specification for a parameter in ``CATEGORICAL`` type.
 
         Args:
-            values (Sequence[str]): 
+            values (Sequence[str]):
                 Required. The list of possible categories.
         """
 
@@ -203,7 +203,7 @@ def __init__(
             might have possible settings of 1.5, 2.5, and
             4.0. This list should not contain more than
             1,000 values.
-        scale (str): 
+        scale (str):
                 Required. The type of scaling that should be applied to this parameter.
 
                 Accepts: 'linear', 'log', 'reverse_log'
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 5a72596177..ab59562331 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -923,12 +923,12 @@ def __init__(
             display_name='my_job',
             worker_pool_specs=worker_pool_specs
         )
-        
+
         my_job.run()
         ```
 
 
-        For more information on configuring worker pool specs please visit: 
+        For more information on configuring worker pool specs please visit:
         https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job
 
 
@@ -937,7 +937,7 @@ def __init__(
                 Required. The user-defined name of the HyperparameterTuningJob.
                 The name can be up to 128 characters long and can be consist
                 of any UTF-8 characters.
-            worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]): 
+            worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]):
                 Required. The spec of the worker pools including machine type and Docker image.
                 Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages.
             project (str):
@@ -1136,7 +1136,7 @@ def run(
                 to workers leaving and joining a job.
             sync (bool):
                 Whether to execute this method synchronously. If False, this method
-                will unblock and it will be executed in a concurrent Future.    
+                will unblock and it will be executed in a concurrent Future.
         """
 
         if service_account:
@@ -1236,7 +1236,7 @@ def __init__(
             display_name='my_job',
             worker_pool_specs=worker_pool_specs
         )
-        
+
 
         hp_job = aiplatform.HyperparameterTuningJob(
             display_name='hp-test',
@@ -1251,7 +1251,7 @@ def __init__(
                 'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear')
             },
             max_trial_count=128,
-            parallel_trial_count=8,    
+            parallel_trial_count=8,
             )
 
         hp_job.run()
@@ -1273,7 +1273,7 @@ def __init__(
                 applies to the CustomJobs created in all the trials.
             metric_spec: Dict[str, str]
                 Required. Dicionary representing metrics to optimize. The dictionary key is the metric_id,
-                which is reported by your training job, and the dictionary value is the 
+                which is reported by your training job, and the dictionary value is the
                 optimization goal of the metric('minimize' or 'maximize'). example:
 
                 metric_spec = {'loss': 'minimize', 'accuracy': 'maximize'}
@@ -1283,15 +1283,15 @@ def __init__(
                 which is passed into your training job as a command line key word arguemnt, and the
                 dictionary value is the parameter specification of the metric.
 
-                
+
                 from google.cloud.aiplatform import hyperparameter_tuning as hpt
-                
+
                 parameter_spec={
                     'decay': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'),
                     'learning_rate': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear')
                     'batch_size': hpt.DiscreteParamterSpec(values=[4, 8, 16, 32, 64, 128], scale='linear')
                 }
-                
+
                 Supported parameter specifications can be found until aiplatform.hyperparameter_tuning.
                 These parameter specification are currently supported:
                 DoubleParameterSpec, IntegerParameterSpec, CategoricalParameterSpace, DiscreteParameterSpec
@@ -1338,7 +1338,7 @@ def __init__(
                 HyperparameterTuningJob. If this is set, then
                 all resources created by the
                 HyperparameterTuningJob will be encrypted with
-                the provided encryption key.              
+                the provided encryption key.
         """
         super().__init__(project=project, location=location, credentials=credentials)
 
@@ -1404,7 +1404,7 @@ def run(
                 to workers leaving and joining a job.
             sync (bool):
                 Whether to execute this method synchronously. If False, this method
-                will unblock and it will be executed in a concurrent Future.    
+                will unblock and it will be executed in a concurrent Future.
         """
 
         if service_account:
diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py
index 3d5e9e510b..37c2ac3df0 100644
--- a/tests/unit/aiplatform/test_custom_job.py
+++ b/tests/unit/aiplatform/test_custom_job.py
@@ -25,7 +25,7 @@
 from google.rpc import status_pb2
 
 import test_training_jobs
-from test_training_jobs import mock_python_package_to_gcs
+from test_training_jobs import mock_python_package_to_gcs  # noqa: F401
 
 from google.cloud import aiplatform
 from google.cloud.aiplatform.compat.types import custom_job as gca_custom_job_compat
@@ -260,7 +260,7 @@ def test_no_staging_bucket_raises(self):
         aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
 
         with pytest.raises(RuntimeError):
-            job = aiplatform.CustomJob(
+            job = aiplatform.CustomJob(  # noqa: F841
                 display_name=_TEST_DISPLAY_NAME,
                 worker_pool_specs=_TEST_WORKER_POOL_SPEC,
             )
@@ -316,7 +316,7 @@ def test_create_from_local_script_raises_with_no_staging_bucket(
         with pytest.raises(RuntimeError):
 
             # configuration on this is tested in test_training_jobs.py
-            job = aiplatform.CustomJob.from_local_script(
+            job = aiplatform.CustomJob.from_local_script(  # noqa: F841
                 display_name=_TEST_DISPLAY_NAME,
                 script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME,
                 container_uri=_TEST_TRAINING_CONTAINER_IMAGE,
diff --git a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py
index d406e07efa..fcd15f93ac 100644
--- a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py
+++ b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py
@@ -21,7 +21,6 @@
 from unittest import mock
 from unittest.mock import patch
 
-from google.protobuf import duration_pb2  # type: ignore
 from google.rpc import status_pb2
 
 from google.cloud import aiplatform

From 1b3bad1910d47c58d8529d9583c6c74741f150bf Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Mon, 17 May 2021 17:42:38 -0400
Subject: [PATCH 19/29] chore: resolve reviewers's comments

---
 google/cloud/aiplatform/hyperparameter_tuning.py | 2 +-
 google/cloud/aiplatform/jobs.py                  | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 56e9fcda4e..74a292e7b9 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -46,7 +46,7 @@ def __init__(
     @classmethod
     @abc.abstractmethod
     def _proto_parameter_value_class(self) -> proto.Message:
-        """The proto represenation of this parameter."""
+        """The proto representation of this parameter."""
         pass
 
     @property
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index ab59562331..0ea014e3d0 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1183,7 +1183,7 @@ def job_spec(self):
 
 
 class HyperparameterTuningJob(_RunnableJob):
-    """AI Pltatform(Unified) HyperparameterTuning Job"""
+    """AI Pltaform (Unified) Hyperparameter Tuning Job."""
 
     _resource_noun = "hyperparameterTuningJobs"
     _getter_method = "get_hyperparameter_tuning_job"
@@ -1396,7 +1396,7 @@ def run(
                 Private services access must already be configured for the network.
                 If left unspecified, the job is not peered with any network.
             timeout (int):
-                The maximum job running time in seconds. The default is 7 days.
+                Optional. The maximum job running time in seconds. The default is 7 days.
             restart_job_on_worker_restart (bool):
                 Restarts the entire CustomJob if a worker
                 gets restarted. This feature can be used by
@@ -1414,9 +1414,9 @@ def run(
             self._gca_resource.trial_job_spec.network = network
 
         if timeout or restart_job_on_worker_restart:
-            timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
+            duration = duration_pb2.Duration(seconds=timeout) if timeout else None
             self._gca_resource.trial_job_spec.scheduling = gca_custom_job_compat.Scheduling(
-                timeout=timeout,
+                timeout=duration,
                 restart_job_on_worker_restart=restart_job_on_worker_restart,
             )
 

From a492741496d3b8f0b99e2d136b4b789dae666dac Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:12:42 -0400
Subject: [PATCH 20/29] Update google/cloud/aiplatform/hyperparameter_tuning.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/hyperparameter_tuning.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/google/cloud/aiplatform/hyperparameter_tuning.py b/google/cloud/aiplatform/hyperparameter_tuning.py
index 74a292e7b9..a7a0e641cd 100644
--- a/google/cloud/aiplatform/hyperparameter_tuning.py
+++ b/google/cloud/aiplatform/hyperparameter_tuning.py
@@ -204,9 +204,9 @@ def __init__(
             4.0. This list should not contain more than
             1,000 values.
         scale (str):
-                Required. The type of scaling that should be applied to this parameter.
+            Required. The type of scaling that should be applied to this parameter.
 
-                Accepts: 'linear', 'log', 'reverse_log'
+            Accepts: 'linear', 'log', 'reverse_log'
         """
 
         super().__init__()

From 24d3949da59b8c0ac8ff09e30a6fad38eccc4824 Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:12:53 -0400
Subject: [PATCH 21/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 0ea014e3d0..1ad51ad864 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1183,7 +1183,7 @@ def job_spec(self):
 
 
 class HyperparameterTuningJob(_RunnableJob):
-    """AI Pltaform (Unified) Hyperparameter Tuning Job."""
+    """AI Platform (Unified) Hyperparameter Tuning Job."""
 
     _resource_noun = "hyperparameterTuningJobs"
     _getter_method = "get_hyperparameter_tuning_job"

From 8e3e99405702711c460754ca639c3bf65f710623 Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:13:15 -0400
Subject: [PATCH 22/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 1ad51ad864..5ca036a0fe 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1008,9 +1008,12 @@ def from_local_script(
         Args:
             display_name (str):
                 Required. The user-defined name of this CustomJob.
-            script_path (str): Required. Local path to training script.
+            script_path (str):
+                Required. Local path to training script.
             container_uri (str):
                 Required: Uri of the training container image to use for custom job.
+            args (Optional[List[Union[str, float, int]]]):
+                Optional. Command line arguments to be passed to the Python task.
             requirements (Sequence[str]):
                 Optional. List of python packages dependencies of script.
             environment_variables (Dict[str, str]):

From 2f355794020f6fcfa28ca6a5eb9475c3ec92a478 Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:13:42 -0400
Subject: [PATCH 23/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 5ca036a0fe..10c29457ab 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1005,6 +1005,21 @@ def from_local_script(
     ) -> "CustomJob":
         """Configures a custom job from a local script.
 
+        Example usage:
+        ```
+        job = aiplatform.CustomJob.from_local_script(
+            display_name="my-custom-job",
+            script_path="training_script.py",
+            container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest",
+            requirements=["gcsfs==0.7.1"],
+            replica_count=1,
+            args=['--dataset', 'gs://my-bucket/my-dataset',
+            '--model_output_uri', 'gs://my-bucket/model']
+        )
+
+        job.run()
+        ```
+
         Args:
             display_name (str):
                 Required. The user-defined name of this CustomJob.

From c30a80ffaec77ae77c8fd931671286dcfc07b6a4 Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:13:58 -0400
Subject: [PATCH 24/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 10c29457ab..b6dc131ac7 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1130,7 +1130,7 @@ def run(
         self,
         service_account: Optional[str] = None,
         network: Optional[str] = None,
-        timeout: Optional[int] = None,  # seconds
+        timeout: Optional[int] = None,
         restart_job_on_worker_restart: bool = False,
         sync: bool = True,
     ) -> None:

From 3a66659d8fd47170e823992d3aeaf514386cca6a Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:14:07 -0400
Subject: [PATCH 25/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index b6dc131ac7..20b3baa3eb 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1232,7 +1232,7 @@ def __init__(
         Example usage:
 
         ```
-        from google.cloud.aiplatform import hyperparamter_tuning as hpt
+        from google.cloud.aiplatform import hyperparameter_tuning as hpt
 
         worker_pool_specs = [
                 {

From 1c9168518bb005d017ed91a3b27caf8f66a0671f Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:14:44 -0400
Subject: [PATCH 26/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 20b3baa3eb..ff2e00b422 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1324,9 +1324,23 @@ def __init__(
                 If set to 0, AI Platform decides how many Trials
                 must fail before the whole job fails.
             search_algorithm (str):
-                The search algorithm specified for the Study.
-
-                Accepts: 'random', 'grid'
+                The search algorithm specified for the Study. 
+                Accepts one of the following:
+                    `None` - If you do not specify an algorithm, your job uses
+                    the default AI Platform algorithm. The default algorithm
+                    applies Bayesian optimization to arrive at the optimal
+                    solution with a more effective search over the parameter space.
+
+                    'grid' - A simple grid search within the feasible space. This
+                    option is particularly useful if you want to specify a quantity
+                    of trials that is greater than the number of points in the
+                    feasible space. In such cases, if you do not specify a grid
+                    search, the AI Platform default algorithm may generate duplicate
+                    suggestions. To use grid search, all parameter specs must be
+                    of type `IntegerParameterSpec`, `CategoricalParameterSpace`,
+                    or `DiscreteParameterSpec`.
+
+                    'random' - A simple random search within the feasible space.
             measurement_selection (str):
                 This indicates which measurement to use if/when the service
                 automatically selects the final measurement from previously reported

From e6a53b5a4686b7cef9cb5335615ef2b138eebdaf Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:16:33 -0400
Subject: [PATCH 27/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index ff2e00b422..9230c14ae3 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1192,6 +1192,7 @@ def job_spec(self):
 _SEARCH_ALGORITHM_TO_PROTO_VALUE = {
     "random": gca_study_compat.StudySpec.Algorithm.RANDOM_SEARCH,
     "grid": gca_study_compat.StudySpec.Algorithm.GRID_SEARCH,
+    None: gca_study_compat.StudySpec.Algorithm.ALGORITHM_UNSPECIFIED,
 }
 
 _MEASUREMENT_SELECTION_TO_PROTO_VALUE = {

From 4e07e6dd6c86c708fbf4a224ea98258c16b4bdb6 Mon Sep 17 00:00:00 2001
From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
Date: Tue, 18 May 2021 09:16:41 -0400
Subject: [PATCH 28/29] Update google/cloud/aiplatform/jobs.py

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
---
 google/cloud/aiplatform/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 9230c14ae3..9c8412e372 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1220,7 +1220,7 @@ def __init__(
         max_trial_count: int,
         parallel_trial_count: int,
         max_failed_trial_count: int = 0,
-        search_algorithm: Optional[str] = "random",
+        search_algorithm: Optional[str] = None,
         measurement_selection: Optional[str] = "best",
         project: Optional[str] = None,
         location: Optional[str] = None,

From d5128b0f8b7a2a5a2d719cc99b4861f177acefbb Mon Sep 17 00:00:00 2001
From: Sasha Sobran <asobran@google.com>
Date: Tue, 18 May 2021 09:52:52 -0400
Subject: [PATCH 29/29] chore: lint

---
 google/cloud/aiplatform/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
index 9c8412e372..7b1f5cccc5 100644
--- a/google/cloud/aiplatform/jobs.py
+++ b/google/cloud/aiplatform/jobs.py
@@ -1325,7 +1325,7 @@ def __init__(
                 If set to 0, AI Platform decides how many Trials
                 must fail before the whole job fails.
             search_algorithm (str):
-                The search algorithm specified for the Study. 
+                The search algorithm specified for the Study.
                 Accepts one of the following:
                     `None` - If you do not specify an algorithm, your job uses
                     the default AI Platform algorithm. The default algorithm