Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: End-to-End Tabular System Test (#610)
* Init end-to-end tabular integration test + script * Checkpoint, add assertions on prediction response * Add a presubmit config for system test changes * Lint * Address PR comments, split out E2E base class * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Drop sample lint type hint requirement * Address comments, add type hints to base class * Update prediction response inspection * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update base teardown fixture to delete Endpoints first * Change pytest-xdist to loadscope, add cfg header Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
- Loading branch information
1 parent
2f89343
commit d97da41
Showing
9 changed files
with
354 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Format: //devtools/kokoro/config/proto/build.proto | ||
|
||
# Run system tests when test files are modified | ||
env_vars: { | ||
key: "NOX_SESSION" | ||
value: "system-3.8" | ||
} | ||
|
||
# Run system tests in parallel, splitting up by file | ||
env_vars: { | ||
key: "PYTEST_ADDOPTS" | ||
value: "-n=auto --dist=loadscope" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
import abc | ||
import importlib | ||
import os | ||
import pytest | ||
import uuid | ||
from typing import Any, Dict, Generator | ||
|
||
from google.api_core import exceptions | ||
from google.cloud import aiplatform | ||
from google.cloud import storage | ||
from google.cloud.aiplatform import initializer | ||
|
||
_PROJECT = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") | ||
_LOCATION = "us-central1" | ||
|
||
|
||
class TestEndToEnd(metaclass=abc.ABCMeta): | ||
@property | ||
@classmethod | ||
@abc.abstractmethod | ||
def _temp_prefix(cls) -> str: | ||
"""Prefix to staging bucket and display names created by this end-to-end test. | ||
Keep the string as short as possible and use kebab case, starting with a lowercase letter. | ||
Example: `"temp-vertex-hpt-test"` | ||
""" | ||
pass | ||
|
||
def setup_method(self): | ||
importlib.reload(initializer) | ||
importlib.reload(aiplatform) | ||
|
||
@pytest.fixture() | ||
def shared_state(self) -> Generator[Dict[str, Any], None, None]: | ||
shared_state = {} | ||
yield shared_state | ||
|
||
@pytest.fixture() | ||
def prepare_staging_bucket( | ||
self, shared_state: Dict[str, Any] | ||
) -> Generator[storage.bucket.Bucket, None, None]: | ||
"""Create a staging bucket and store bucket resource object in shared state.""" | ||
|
||
staging_bucket_name = f"{self._temp_prefix.lower()}-{uuid.uuid4()}"[:63] | ||
shared_state["staging_bucket_name"] = staging_bucket_name | ||
|
||
storage_client = storage.Client(project=_PROJECT) | ||
shared_state["storage_client"] = storage_client | ||
|
||
shared_state["bucket"] = storage_client.create_bucket( | ||
staging_bucket_name, location=_LOCATION | ||
) | ||
yield | ||
|
||
@pytest.fixture() | ||
def delete_staging_bucket(self, shared_state: Dict[str, Any]): | ||
"""Delete the staging bucket and all it's contents""" | ||
|
||
yield | ||
|
||
# Get the staging bucket used for testing and wipe it | ||
bucket = shared_state["bucket"] | ||
bucket.delete(force=True) | ||
|
||
@pytest.fixture(autouse=True) | ||
def teardown(self, shared_state: Dict[str, Any]): | ||
"""Delete every Vertex AI resource created during test""" | ||
|
||
yield | ||
|
||
# Bring all Endpoints to the front of the list | ||
# Ensures Models are undeployed first before we attempt deletion | ||
shared_state["resources"].sort( | ||
key=lambda r: 1 if isinstance(r, aiplatform.Endpoint) else 2 | ||
) | ||
|
||
for resource in shared_state["resources"]: | ||
try: | ||
if isinstance(resource, aiplatform.Endpoint): | ||
resource.delete(force=True) # Undeploy model then delete endpoint | ||
else: | ||
resource.delete() | ||
except exceptions.GoogleAPIError as e: | ||
print(f"Could not delete resource: {resource} due to: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
import os | ||
import uuid | ||
from urllib import request | ||
|
||
import pytest | ||
|
||
from google.cloud import aiplatform | ||
from tests.system.aiplatform import e2e_base | ||
|
||
|
||
_BLOB_PATH = "california-housing-data.csv" | ||
_DATASET_SRC = "https://dl.google.com/mlcc/mledu-datasets/california_housing_train.csv" | ||
_DIR_NAME = os.path.dirname(os.path.abspath(__file__)) | ||
_LOCAL_TRAINING_SCRIPT_PATH = os.path.join( | ||
_DIR_NAME, "test_resources/california_housing_training_script.py" | ||
) | ||
|
||
|
||
@pytest.mark.usefixtures("prepare_staging_bucket", "delete_staging_bucket", "teardown") | ||
class TestEndToEndTabular(e2e_base.TestEndToEnd): | ||
"""End to end system test of the Vertex SDK with tabular data adapted from | ||
reference notebook http://shortn/_eyoNx3SN0X""" | ||
|
||
_temp_prefix = "temp-vertex-sdk-e2e-tabular" | ||
|
||
def test_end_to_end_tabular(self, shared_state): | ||
"""Build dataset, train a custom and AutoML model, deploy, and get predictions""" | ||
|
||
assert shared_state["bucket"] | ||
bucket = shared_state["bucket"] | ||
|
||
blob = bucket.blob(_BLOB_PATH) | ||
|
||
# Download the CSV file into memory and save it directory to staging bucket | ||
with request.urlopen(_DATASET_SRC) as response: | ||
data = response.read() | ||
blob.upload_from_string(data) | ||
|
||
# Collection of resources generated by this test, to be deleted during teardown | ||
shared_state["resources"] = [] | ||
|
||
aiplatform.init( | ||
project=e2e_base._PROJECT, | ||
location=e2e_base._LOCATION, | ||
staging_bucket=shared_state["staging_bucket_name"], | ||
) | ||
|
||
# Create and import to single managed dataset for both training jobs | ||
|
||
ds = aiplatform.TabularDataset.create( | ||
display_name=f"{self._temp_prefix}-dataset-{uuid.uuid4()}", | ||
gcs_source=[f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}'], | ||
sync=False, | ||
) | ||
|
||
shared_state["resources"].extend([ds]) | ||
|
||
# Define both training jobs | ||
|
||
custom_job = aiplatform.CustomTrainingJob( | ||
display_name=f"{self._temp_prefix}-train-housing-custom-{uuid.uuid4()}", | ||
script_path=_LOCAL_TRAINING_SCRIPT_PATH, | ||
container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest", | ||
requirements=["gcsfs==0.7.1"], | ||
model_serving_container_image_uri="gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest", | ||
) | ||
|
||
automl_job = aiplatform.AutoMLTabularTrainingJob( | ||
display_name=f"{self._temp_prefix}-train-housing-automl-{uuid.uuid4()}", | ||
optimization_prediction_type="regression", | ||
optimization_objective="minimize-rmse", | ||
) | ||
|
||
# Kick off both training jobs, AutoML job will take approx one hour to run | ||
|
||
custom_model = custom_job.run( | ||
ds, | ||
replica_count=1, | ||
model_display_name=f"{self._temp_prefix}-custom-housing-model-{uuid.uuid4()}", | ||
sync=False, | ||
) | ||
|
||
automl_model = automl_job.run( | ||
dataset=ds, | ||
target_column="median_house_value", | ||
model_display_name=f"{self._temp_prefix}-automl-housing-model-{uuid.uuid4()}", | ||
sync=False, | ||
) | ||
|
||
shared_state["resources"].extend( | ||
[automl_job, automl_model, custom_job, custom_model] | ||
) | ||
|
||
# Deploy both models after training completes | ||
custom_endpoint = custom_model.deploy(machine_type="n1-standard-4", sync=False) | ||
automl_endpoint = automl_model.deploy(machine_type="n1-standard-4", sync=False) | ||
shared_state["resources"].extend([automl_endpoint, custom_endpoint]) | ||
|
||
# Send online prediction with same instance to both deployed models | ||
# This sample is taken from an observation where median_house_value = 94600 | ||
custom_endpoint.wait() | ||
custom_prediction = custom_endpoint.predict( | ||
[ | ||
{ | ||
"longitude": -124.35, | ||
"latitude": 40.54, | ||
"housing_median_age": 52.0, | ||
"total_rooms": 1820.0, | ||
"total_bedrooms": 300.0, | ||
"population": 806, | ||
"households": 270.0, | ||
"median_income": 3.014700, | ||
}, | ||
] | ||
) | ||
automl_endpoint.wait() | ||
automl_prediction = automl_endpoint.predict( | ||
[ | ||
{ | ||
"longitude": "-124.35", | ||
"latitude": "40.54", | ||
"housing_median_age": "52.0", | ||
"total_rooms": "1820.0", | ||
"total_bedrooms": "300.0", | ||
"population": "806", | ||
"households": "270.0", | ||
"median_income": "3.014700", | ||
}, | ||
] | ||
) | ||
|
||
# Ensure a single prediction was returned | ||
assert len(custom_prediction.predictions) == 1 | ||
assert len(automl_prediction.predictions) == 1 | ||
|
||
# Ensure the models are remotely accurate | ||
try: | ||
automl_result = automl_prediction.predictions[0]["value"] | ||
custom_result = custom_prediction.predictions[0][0] | ||
assert 200000 > automl_result > 50000 | ||
assert 200000 > custom_result > 50000 | ||
except KeyError as e: | ||
raise RuntimeError("Unexpected prediction response structure:", e) |
Oops, something went wrong.