From fb165b3632119b361a1936f367128f7146b49685 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Tue, 17 Nov 2020 16:17:17 -0800 Subject: [PATCH] feat: add custom_job samples (#69) --- ...ate_training_pipeline_custom_job_sample.py | 76 ++++++++++++++++ ...raining_pipeline_custom_job_sample_test.py | 79 +++++++++++++++++ ...eploy_model_custom_trained_model_sample.py | 62 +++++++++++++ ..._model_custom_trained_model_sample_test.py | 88 +++++++++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 samples/snippets/create_training_pipeline_custom_job_sample.py create mode 100644 samples/snippets/create_training_pipeline_custom_job_sample_test.py create mode 100644 samples/snippets/deploy_model_custom_trained_model_sample.py create mode 100644 samples/snippets/deploy_model_custom_trained_model_sample_test.py diff --git a/samples/snippets/create_training_pipeline_custom_job_sample.py b/samples/snippets/create_training_pipeline_custom_job_sample.py new file mode 100644 index 0000000000..b8918f5b09 --- /dev/null +++ b/samples/snippets/create_training_pipeline_custom_job_sample.py @@ -0,0 +1,76 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_create_training_pipeline_custom_job_sample] +from google.cloud import aiplatform +from google.protobuf import json_format +from google.protobuf.struct_pb2 import Value + + +def create_training_pipeline_custom_job_sample( + project: str, + display_name: str, + model_display_name: str, + container_image_uri: str, + base_output_directory_prefix: str, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.PipelineServiceClient(client_options=client_options) + + training_task_inputs_dict = { + "workerPoolSpecs": [ + { + "replicaCount": 1, + "machineSpec": {"machineType": "n1-standard-4"}, + "containerSpec": { + # A working docker image can be found at gs://cloud-samples-data/ai-platform/mnist_tfrecord/custom_job + "imageUri": container_image_uri, + "args": [ + # AIP_MODEL_DIR is set by the service according to baseOutputDirectory. + "--model_dir=$(AIP_MODEL_DIR)", + ], + }, + } + ], + "baseOutputDirectory": { + # The GCS location for outputs must be accessible by the project's AI Platform service account. + "output_uri_prefix": base_output_directory_prefix + }, + } + training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value()) + + training_task_definition = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml" + image_uri = "gcr.io/cloud-aiplatform/prediction/tf-cpu.1-15:latest" + + training_pipeline = { + "display_name": display_name, + "training_task_definition": training_task_definition, + "training_task_inputs": training_task_inputs, + "model_to_upload": { + "display_name": model_display_name, + "container_spec": {"image_uri": image_uri,}, + }, + } + parent = f"projects/{project}/locations/{location}" + response = client.create_training_pipeline( + parent=parent, training_pipeline=training_pipeline + ) + print("response:", response) + + +# [END aiplatform_create_training_pipeline_custom_job_sample] diff --git a/samples/snippets/create_training_pipeline_custom_job_sample_test.py b/samples/snippets/create_training_pipeline_custom_job_sample_test.py new file mode 100644 index 0000000000..fc593a5d44 --- /dev/null +++ b/samples/snippets/create_training_pipeline_custom_job_sample_test.py @@ -0,0 +1,79 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from uuid import uuid4 +import pytest +import os + +import helpers + +import create_training_pipeline_custom_job_sample + +from google.cloud import aiplatform + +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +DISPLAY_NAME = f"temp_create_training_pipeline_custom_job_test_{uuid4()}" + + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture +def pipeline_client(): + pipeline_client = aiplatform.gapic.PipelineServiceClient( + client_options={"api_endpoint": "us-central1-aiplatform.googleapis.com"} + ) + return pipeline_client + + +@pytest.fixture(scope="function", autouse=True) +def teardown(shared_state, pipeline_client): + yield + + training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1] + + pipeline_client.cancel_training_pipeline( + name=shared_state["training_pipeline_name"] + ) + + # Waiting for training pipeline to be in CANCELLED state + helpers.wait_for_job_state( + get_job_method=pipeline_client.get_training_pipeline, + name=shared_state["training_pipeline_name"], + ) + + # Delete the training pipeline + pipeline_client.delete_training_pipeline( + name=shared_state["training_pipeline_name"] + ) + + +def test_ucaip_generated_create_training_pipeline_custom_job_sample(capsys, shared_state): + + create_training_pipeline_custom_job_sample.create_training_pipeline_custom_job_sample( + project=PROJECT_ID, + display_name=DISPLAY_NAME, + model_display_name=f"Temp Model for {DISPLAY_NAME}", + container_image_uri='gcr.io/ucaip-sample-tests/mnist-custom-job:latest', + base_output_directory_prefix='gs://ucaip-samples-us-central1/training_pipeline_output' + ) + + out, _ = capsys.readouterr() + assert "response:" in out + + # Save resource name of the newly created training pipeline + shared_state["training_pipeline_name"] = helpers.get_name(out) diff --git a/samples/snippets/deploy_model_custom_trained_model_sample.py b/samples/snippets/deploy_model_custom_trained_model_sample.py new file mode 100644 index 0000000000..439bdc802e --- /dev/null +++ b/samples/snippets/deploy_model_custom_trained_model_sample.py @@ -0,0 +1,62 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_deploy_model_custom_trained_model_sample] +from google.cloud import aiplatform + + +def deploy_model_custom_trained_model_sample( + project: str, + endpoint_id: str, + model_name: str, + deployed_model_display_name: str, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", + timeout: int = 7200, +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.EndpointServiceClient(client_options=client_options) + deployed_model = { + # format: 'projects/{project}/locations/{location}/models/{model}' + "model": model_name, + "display_name": deployed_model_display_name, + # `dedicated_resources` must be used for non-AutoML models + "dedicated_resources": { + "min_replica_count": 1, + "machine_spec": { + "machine_type": "n1-standard-2", + # Accelerators can be used only if the model specifies a GPU image. + # 'accelerator_type': aiplatform.gapic.AcceleratorType.NVIDIA_TESLA_K80, + # 'accelerator_count': 1, + }, + }, + } + # key '0' assigns traffic for the newly deployed model + # Traffic percentage values must add up to 100 + # Leave dictionary empty if endpoint should not accept any traffic + traffic_split = {"0": 100} + endpoint = client.endpoint_path( + project=project, location=location, endpoint=endpoint_id + ) + response = client.deploy_model( + endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split + ) + print("Long running operation:", response.operation.name) + deploy_model_response = response.result(timeout=timeout) + print("deploy_model_response:", deploy_model_response) + + +# [END aiplatform_deploy_model_custom_trained_model_sample] diff --git a/samples/snippets/deploy_model_custom_trained_model_sample_test.py b/samples/snippets/deploy_model_custom_trained_model_sample_test.py new file mode 100644 index 0000000000..49f096fdb6 --- /dev/null +++ b/samples/snippets/deploy_model_custom_trained_model_sample_test.py @@ -0,0 +1,88 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import aiplatform +import deploy_model_custom_trained_model_sample + +from uuid import uuid4 +import pytest +import os + +import helpers + +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +LOCATION = "us-central1" +PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}" +DISPLAY_NAME = f"temp_deploy_model_custom_trained_model_test_{uuid4()}" + +# Resource Name of "permanent_custom_mnist_model" +MODEL_NAME = "projects/580378083368/locations/us-central1/models/4992732768149438464" + + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture +def endpoint_client(): + client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"} + endpoint_client = aiplatform.gapic.EndpointServiceClient( + client_options=client_options + ) + return endpoint_client + + +@pytest.fixture(scope="function", autouse=True) +def setup(shared_state, endpoint_client): + create_endpoint_response = endpoint_client.create_endpoint( + parent=PARENT, endpoint={"display_name": DISPLAY_NAME} + ) + shared_state["endpoint"] = create_endpoint_response.result().name + + +def test_ucaip_generated_deploy_model_custom_trained_model_sample(capsys, shared_state): + + assert shared_state["endpoint"] is not None + + # Deploy existing image classification model to endpoint + deploy_model_custom_trained_model_sample.deploy_model_custom_trained_model_sample( + project=PROJECT_ID, + model_name=MODEL_NAME, + deployed_model_display_name=DISPLAY_NAME, + endpoint_id=shared_state["endpoint"].split("/")[-1], + ) + + # Store deployed model ID for undeploying + out, _ = capsys.readouterr() + assert "deploy_model_response" in out + + shared_state["deployed_model_id"] = helpers.get_name(out=out, key="id") + + +@pytest.fixture(scope="function", autouse=True) +def teardown(shared_state, endpoint_client): + yield + + undeploy_model_operation = endpoint_client.undeploy_model( + deployed_model_id=shared_state["deployed_model_id"], + endpoint=shared_state["endpoint"], + ) + undeploy_model_operation.result() + + # Delete the endpoint + endpoint_client.delete_endpoint( + name=shared_state["endpoint"] + )