Skip to content

Commit

Permalink
feat: add custom_job samples (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
dizcology committed Nov 18, 2020
1 parent 7daacd5 commit fb165b3
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 0 deletions.
76 changes: 76 additions & 0 deletions samples/snippets/create_training_pipeline_custom_job_sample.py
@@ -0,0 +1,76 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_create_training_pipeline_custom_job_sample]
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_training_pipeline_custom_job_sample(
project: str,
display_name: str,
model_display_name: str,
container_image_uri: str,
base_output_directory_prefix: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.PipelineServiceClient(client_options=client_options)

training_task_inputs_dict = {
"workerPoolSpecs": [
{
"replicaCount": 1,
"machineSpec": {"machineType": "n1-standard-4"},
"containerSpec": {
# A working docker image can be found at gs://cloud-samples-data/ai-platform/mnist_tfrecord/custom_job
"imageUri": container_image_uri,
"args": [
# AIP_MODEL_DIR is set by the service according to baseOutputDirectory.
"--model_dir=$(AIP_MODEL_DIR)",
],
},
}
],
"baseOutputDirectory": {
# The GCS location for outputs must be accessible by the project's AI Platform service account.
"output_uri_prefix": base_output_directory_prefix
},
}
training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())

training_task_definition = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml"
image_uri = "gcr.io/cloud-aiplatform/prediction/tf-cpu.1-15:latest"

training_pipeline = {
"display_name": display_name,
"training_task_definition": training_task_definition,
"training_task_inputs": training_task_inputs,
"model_to_upload": {
"display_name": model_display_name,
"container_spec": {"image_uri": image_uri,},
},
}
parent = f"projects/{project}/locations/{location}"
response = client.create_training_pipeline(
parent=parent, training_pipeline=training_pipeline
)
print("response:", response)


# [END aiplatform_create_training_pipeline_custom_job_sample]
@@ -0,0 +1,79 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from uuid import uuid4
import pytest
import os

import helpers

import create_training_pipeline_custom_job_sample

from google.cloud import aiplatform

PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
DISPLAY_NAME = f"temp_create_training_pipeline_custom_job_test_{uuid4()}"


@pytest.fixture
def shared_state():
state = {}
yield state


@pytest.fixture
def pipeline_client():
pipeline_client = aiplatform.gapic.PipelineServiceClient(
client_options={"api_endpoint": "us-central1-aiplatform.googleapis.com"}
)
return pipeline_client


@pytest.fixture(scope="function", autouse=True)
def teardown(shared_state, pipeline_client):
yield

training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1]

pipeline_client.cancel_training_pipeline(
name=shared_state["training_pipeline_name"]
)

# Waiting for training pipeline to be in CANCELLED state
helpers.wait_for_job_state(
get_job_method=pipeline_client.get_training_pipeline,
name=shared_state["training_pipeline_name"],
)

# Delete the training pipeline
pipeline_client.delete_training_pipeline(
name=shared_state["training_pipeline_name"]
)


def test_ucaip_generated_create_training_pipeline_custom_job_sample(capsys, shared_state):

create_training_pipeline_custom_job_sample.create_training_pipeline_custom_job_sample(
project=PROJECT_ID,
display_name=DISPLAY_NAME,
model_display_name=f"Temp Model for {DISPLAY_NAME}",
container_image_uri='gcr.io/ucaip-sample-tests/mnist-custom-job:latest',
base_output_directory_prefix='gs://ucaip-samples-us-central1/training_pipeline_output'
)

out, _ = capsys.readouterr()
assert "response:" in out

# Save resource name of the newly created training pipeline
shared_state["training_pipeline_name"] = helpers.get_name(out)
62 changes: 62 additions & 0 deletions samples/snippets/deploy_model_custom_trained_model_sample.py
@@ -0,0 +1,62 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_deploy_model_custom_trained_model_sample]
from google.cloud import aiplatform


def deploy_model_custom_trained_model_sample(
project: str,
endpoint_id: str,
model_name: str,
deployed_model_display_name: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
timeout: int = 7200,
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.EndpointServiceClient(client_options=client_options)
deployed_model = {
# format: 'projects/{project}/locations/{location}/models/{model}'
"model": model_name,
"display_name": deployed_model_display_name,
# `dedicated_resources` must be used for non-AutoML models
"dedicated_resources": {
"min_replica_count": 1,
"machine_spec": {
"machine_type": "n1-standard-2",
# Accelerators can be used only if the model specifies a GPU image.
# 'accelerator_type': aiplatform.gapic.AcceleratorType.NVIDIA_TESLA_K80,
# 'accelerator_count': 1,
},
},
}
# key '0' assigns traffic for the newly deployed model
# Traffic percentage values must add up to 100
# Leave dictionary empty if endpoint should not accept any traffic
traffic_split = {"0": 100}
endpoint = client.endpoint_path(
project=project, location=location, endpoint=endpoint_id
)
response = client.deploy_model(
endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
)
print("Long running operation:", response.operation.name)
deploy_model_response = response.result(timeout=timeout)
print("deploy_model_response:", deploy_model_response)


# [END aiplatform_deploy_model_custom_trained_model_sample]
88 changes: 88 additions & 0 deletions samples/snippets/deploy_model_custom_trained_model_sample_test.py
@@ -0,0 +1,88 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import aiplatform
import deploy_model_custom_trained_model_sample

from uuid import uuid4
import pytest
import os

import helpers

PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
LOCATION = "us-central1"
PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}"
DISPLAY_NAME = f"temp_deploy_model_custom_trained_model_test_{uuid4()}"

# Resource Name of "permanent_custom_mnist_model"
MODEL_NAME = "projects/580378083368/locations/us-central1/models/4992732768149438464"


@pytest.fixture
def shared_state():
state = {}
yield state


@pytest.fixture
def endpoint_client():
client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
endpoint_client = aiplatform.gapic.EndpointServiceClient(
client_options=client_options
)
return endpoint_client


@pytest.fixture(scope="function", autouse=True)
def setup(shared_state, endpoint_client):
create_endpoint_response = endpoint_client.create_endpoint(
parent=PARENT, endpoint={"display_name": DISPLAY_NAME}
)
shared_state["endpoint"] = create_endpoint_response.result().name


def test_ucaip_generated_deploy_model_custom_trained_model_sample(capsys, shared_state):

assert shared_state["endpoint"] is not None

# Deploy existing image classification model to endpoint
deploy_model_custom_trained_model_sample.deploy_model_custom_trained_model_sample(
project=PROJECT_ID,
model_name=MODEL_NAME,
deployed_model_display_name=DISPLAY_NAME,
endpoint_id=shared_state["endpoint"].split("/")[-1],
)

# Store deployed model ID for undeploying
out, _ = capsys.readouterr()
assert "deploy_model_response" in out

shared_state["deployed_model_id"] = helpers.get_name(out=out, key="id")


@pytest.fixture(scope="function", autouse=True)
def teardown(shared_state, endpoint_client):
yield

undeploy_model_operation = endpoint_client.undeploy_model(
deployed_model_id=shared_state["deployed_model_id"],
endpoint=shared_state["endpoint"],
)
undeploy_model_operation.result()

# Delete the endpoint
endpoint_client.delete_endpoint(
name=shared_state["endpoint"]
)

0 comments on commit fb165b3

Please sign in to comment.