Skip to content

Commit

Permalink
feat: Added create_training_pipeline_custom_job_sample and create_tra…
Browse files Browse the repository at this point in the history
…ining_pipeline_custom_training_managed_dataset_sample and fixed create_training_pipeline_image_classification_sample (#343)

* Added create_and_import_dataset_tabular_gcs_sample.py

* Added create_and_import_dataset_tabular_bigquery_sample.py

* Added create_training_pipeline_custom_job_sample.py and tweaked other tests

* Added create_training_pipeline_custom_training_managed_dataset_sample and fixed unmanaged sample

* Fixed args

* Deleted duplicated samples

* Added more args to samples

* Ran black

* Ran linter

Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com>
  • Loading branch information
ivanmkc and sasha-gitg committed Apr 27, 2021
1 parent c39eab2 commit 1c6b998
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 2 deletions.
13 changes: 13 additions & 0 deletions samples/model-builder/conftest.py
Expand Up @@ -152,6 +152,19 @@ def mock_run_automl_image_training_job():
yield mock


@pytest.fixture
def mock_init_custom_training_job():
with patch.object(aiplatform.training_jobs.CustomTrainingJob, "__init__") as mock:
mock.return_value = None
yield mock


@pytest.fixture
def mock_run_custom_training_job():
with patch.object(aiplatform.training_jobs.CustomTrainingJob, "run") as mock:
yield mock


"""
----------------------------------------------------------------------------
Model Fixtures
Expand Down
@@ -0,0 +1,69 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional, Union

from google.cloud import aiplatform


# [START aiplatform_sdk_create_training_pipeline_custom_job_sample]
def create_training_pipeline_custom_job_sample(
project: str,
location: str,
display_name: str,
script_path: str,
container_uri: str,
model_serving_container_image_uri: str,
model_display_name: Optional[str] = None,
args: Optional[List[Union[str, float, int]]] = None,
replica_count: int = 0,
machine_type: str = "n1-standard-4",
accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
accelerator_count: int = 0,
training_fraction_split: float = 0.8,
validation_fraction_split: float = 0.1,
test_fraction_split: float = 0.1,
sync: bool = True,
):
aiplatform.init(project=project, location=location)

job = aiplatform.CustomTrainingJob(
display_name=display_name,
script_path=script_path,
container_uri=container_uri,
model_serving_container_image_uri=model_serving_container_image_uri,
)

model = job.run(
model_display_name=model_display_name,
args=args,
replica_count=replica_count,
machine_type=machine_type,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
training_fraction_split=training_fraction_split,
validation_fraction_split=validation_fraction_split,
test_fraction_split=test_fraction_split,
sync=sync,
)

model.wait()

print(model.display_name)
print(model.resource_name)
print(model.uri)
return model


# [END aiplatform_sdk_create_training_pipeline_custom_job_sample]
62 changes: 62 additions & 0 deletions samples/model-builder/create_training_pipeline_custom_job_test.py
@@ -0,0 +1,62 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import create_training_pipeline_custom_job_sample
import test_constants as constants


def test_create_training_pipeline_custom_job_sample(
mock_sdk_init, mock_init_custom_training_job, mock_run_custom_training_job,
):

create_training_pipeline_custom_job_sample.create_training_pipeline_custom_job_sample(
project=constants.PROJECT,
location=constants.LOCATION,
display_name=constants.DISPLAY_NAME,
args=constants.ARGS,
script_path=constants.SCRIPT_PATH,
container_uri=constants.CONTAINER_URI,
model_serving_container_image_uri=constants.CONTAINER_URI,
model_display_name=constants.DISPLAY_NAME_2,
replica_count=constants.REPLICA_COUNT,
machine_type=constants.MACHINE_TYPE,
accelerator_type=constants.ACCELERATOR_TYPE,
accelerator_count=constants.ACCELERATOR_COUNT,
training_fraction_split=constants.TRAINING_FRACTION_SPLIT,
validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT,
test_fraction_split=constants.TEST_FRACTION_SPLIT,
)

mock_sdk_init.assert_called_once_with(
project=constants.PROJECT, location=constants.LOCATION
)
mock_init_custom_training_job.assert_called_once_with(
display_name=constants.DISPLAY_NAME,
script_path=constants.SCRIPT_PATH,
container_uri=constants.CONTAINER_URI,
model_serving_container_image_uri=constants.CONTAINER_URI,
)
mock_run_custom_training_job.assert_called_once_with(
model_display_name=constants.DISPLAY_NAME_2,
replica_count=constants.REPLICA_COUNT,
machine_type=constants.MACHINE_TYPE,
accelerator_type=constants.ACCELERATOR_TYPE,
accelerator_count=constants.ACCELERATOR_COUNT,
args=constants.ARGS,
training_fraction_split=constants.TRAINING_FRACTION_SPLIT,
validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT,
test_fraction_split=constants.TEST_FRACTION_SPLIT,
sync=True,
)
@@ -0,0 +1,73 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional, Union

from google.cloud import aiplatform


# [START aiplatform_sdk_create_training_pipeline_custom_job_sample]
def create_training_pipeline_custom_training_managed_dataset_sample(
project: str,
location: str,
display_name: str,
script_path: str,
container_uri: str,
model_serving_container_image_uri: str,
dataset_id: int,
model_display_name: Optional[str] = None,
args: Optional[List[Union[str, float, int]]] = None,
replica_count: int = 0,
machine_type: str = "n1-standard-4",
accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED",
accelerator_count: int = 0,
training_fraction_split: float = 0.8,
validation_fraction_split: float = 0.1,
test_fraction_split: float = 0.1,
sync: bool = True,
):
aiplatform.init(project=project, location=location)

job = aiplatform.CustomTrainingJob(
display_name=display_name,
script_path=script_path,
container_uri=container_uri,
model_serving_container_image_uri=model_serving_container_image_uri,
)

my_image_ds = aiplatform.ImageDataset(dataset_id)

model = job.run(
dataset=my_image_ds,
model_display_name=model_display_name,
args=args,
replica_count=replica_count,
machine_type=machine_type,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
training_fraction_split=training_fraction_split,
validation_fraction_split=validation_fraction_split,
test_fraction_split=test_fraction_split,
sync=sync,
)

model.wait()

print(model.display_name)
print(model.resource_name)
print(model.uri)
return model


# [END aiplatform_sdk_create_training_pipeline_custom_job_sample]
@@ -0,0 +1,70 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import create_training_pipeline_custom_training_managed_dataset_sample
import test_constants as constants


def test_create_training_pipeline_custom_job_sample(
mock_sdk_init,
mock_image_dataset,
mock_init_custom_training_job,
mock_run_custom_training_job,
mock_get_image_dataset,
):

create_training_pipeline_custom_training_managed_dataset_sample.create_training_pipeline_custom_training_managed_dataset_sample(
project=constants.PROJECT,
location=constants.LOCATION,
display_name=constants.DISPLAY_NAME,
args=constants.ARGS,
script_path=constants.SCRIPT_PATH,
container_uri=constants.CONTAINER_URI,
model_serving_container_image_uri=constants.CONTAINER_URI,
dataset_id=constants.RESOURCE_ID,
model_display_name=constants.DISPLAY_NAME_2,
replica_count=constants.REPLICA_COUNT,
machine_type=constants.MACHINE_TYPE,
accelerator_type=constants.ACCELERATOR_TYPE,
accelerator_count=constants.ACCELERATOR_COUNT,
training_fraction_split=constants.TRAINING_FRACTION_SPLIT,
validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT,
test_fraction_split=constants.TEST_FRACTION_SPLIT,
)

mock_get_image_dataset.assert_called_once_with(constants.RESOURCE_ID)

mock_sdk_init.assert_called_once_with(
project=constants.PROJECT, location=constants.LOCATION
)
mock_init_custom_training_job.assert_called_once_with(
display_name=constants.DISPLAY_NAME,
script_path=constants.SCRIPT_PATH,
container_uri=constants.CONTAINER_URI,
model_serving_container_image_uri=constants.CONTAINER_URI,
)
mock_run_custom_training_job.assert_called_once_with(
dataset=mock_image_dataset,
model_display_name=constants.DISPLAY_NAME_2,
args=constants.ARGS,
replica_count=constants.REPLICA_COUNT,
machine_type=constants.MACHINE_TYPE,
accelerator_type=constants.ACCELERATOR_TYPE,
accelerator_count=constants.ACCELERATOR_COUNT,
training_fraction_split=constants.TRAINING_FRACTION_SPLIT,
validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT,
test_fraction_split=constants.TEST_FRACTION_SPLIT,
sync=True,
)
Expand Up @@ -12,16 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from google.cloud import aiplatform


# [START aiplatform_sdk_create_training_pipeline_image_classification_sample]
def create_training_pipeline_image_classification_sample(
project: str,
location: str,
display_name: str,
dataset_id: int,
location: str = "us-central1",
model_display_name: str = None,
model_display_name: Optional[str] = None,
training_fraction_split: float = 0.8,
validation_fraction_split: float = 0.1,
test_fraction_split: float = 0.1,
Expand Down
Expand Up @@ -27,6 +27,7 @@ def test_create_training_pipeline_image_classification_sample(

create_training_pipeline_image_classification_sample.create_training_pipeline_image_classification_sample(
project=constants.PROJECT,
location=constants.LOCATION,
display_name=constants.DISPLAY_NAME,
dataset_id=constants.RESOURCE_ID,
model_display_name=constants.DISPLAY_NAME_2,
Expand Down
9 changes: 9 additions & 0 deletions samples/model-builder/test_constants.py
Expand Up @@ -40,6 +40,7 @@
TRAINING_JOB_NAME = f"{PARENT}/trainingJobs/{RESOURCE_ID}"

GCS_SOURCES = ["gs://bucket1/source1.jsonl", "gs://bucket7/source4.jsonl"]
BIGQUERY_SOURCE = "bq://bigquery-public-data.ml_datasets.iris"
GCS_DESTINATION = "gs://bucket3/output-dir/"

TRAINING_FRACTION_SPLIT = 0.7
Expand All @@ -51,3 +52,11 @@
ENCRYPTION_SPEC_KEY_NAME = f"{PARENT}/keyRings/{RESOURCE_ID}/cryptoKeys/{RESOURCE_ID_2}"

PREDICTION_TEXT_INSTANCE = "This is some text for testing NLP prediction output"

SCRIPT_PATH = "task.py"
CONTAINER_URI = "gcr.io/my_project/my_image:latest"
ARGS = ["--tfds", "tf_flowers:3.*.*"]
REPLICA_COUNT = 0
MACHINE_TYPE = "n1-standard-4"
ACCELERATOR_TYPE = "ACCELERATOR_TYPE_UNSPECIFIED"
ACCELERATOR_COUNT = 0

0 comments on commit 1c6b998

Please sign in to comment.