Skip to content

Commit

Permalink
feat: Added tabular forecasting samples (#128)
Browse files Browse the repository at this point in the history
* Added predict, get_model_evaluation and create_training_pipeline samples for AutoML Forecasting

* Added param handlers

* Added headers manually

* fix: Improved forecasting sample

* Added forecasting test

* Added tests for predict and get_model_evaluation

* fix: Fixed create_training_pipeline_sample

* feat: Added list_model_evaluations_tabular_forecasting_sample and test, fixed get_model_evaluation_tabular_forecasting_sample, and fixed create_training_pipeline_tabular_forecasting_sample

* fix: Reverted back to generated BUILD_SPECIFIC_GCLOUD_PROJECT

* fix: Fixed name of test

* fix: Fixed lint errors

* fix: Fixed assertion

* fix: Removed predict samples

* Consolidated samples

* fix: Removed list_model_evaluations_tabular_forecasting

* fix: tweaks

Co-authored-by: Ivan Cheung <ivanmkc@google.com>
  • Loading branch information
ivanmkc and Ivan Cheung committed Dec 22, 2020
1 parent 624a08d commit 69fc7fd
Show file tree
Hide file tree
Showing 5 changed files with 264 additions and 0 deletions.
@@ -0,0 +1,80 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


def make_parent(parent: str) -> str:
parent = parent

return parent


def make_training_pipeline(
display_name: str,
dataset_id: str,
model_display_name: str,
target_column: str,
time_series_identifier_column: str,
time_column: str,
static_columns: str,
time_variant_past_only_columns: str,
time_variant_past_and_future_columns: str,
forecast_window_end: int,
) -> google.cloud.aiplatform_v1alpha1.types.training_pipeline.TrainingPipeline:
# set the columns used for training and their data types
transformations = [
{"auto": {"column_name": "date"}},
{"auto": {"column_name": "state_name"}},
{"auto": {"column_name": "county_fips_code"}},
{"auto": {"column_name": "confirmed_cases"}},
{"auto": {"column_name": "deaths"}},
]

period = {"unit": "day", "quantity": 1}

# the inputs should be formatted according to the training_task_definition yaml file
training_task_inputs_dict = {
# required inputs
"targetColumn": target_column,
"timeSeriesIdentifierColumn": time_series_identifier_column,
"timeColumn": time_column,
"transformations": transformations,
"period": period,
"optimizationObjective": "minimize-rmse",
"trainBudgetMilliNodeHours": 8000,
"staticColumns": static_columns,
"timeVariantPastOnlyColumns": time_variant_past_only_columns,
"timeVariantPastAndFutureColumns": time_variant_past_and_future_columns,
"forecastWindowEnd": forecast_window_end,
}

training_task_inputs = to_protobuf_value(training_task_inputs_dict)

training_pipeline = {
"display_name": display_name,
"training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml",
"training_task_inputs": training_task_inputs,
"input_data_config": {
"dataset_id": dataset_id,
"fraction_split": {
"training_fraction": 0.8,
"validation_fraction": 0.1,
"test_fraction": 0.1,
},
},
"model_to_upload": {"display_name": model_display_name},
}

return training_pipeline

5 changes: 5 additions & 0 deletions .sample_configs/process_configs.yaml
Expand Up @@ -19,6 +19,7 @@ create_batch_prediction_job_custom_image_explain_sample: {}
create_batch_prediction_job_custom_tabular_explain_sample: {}
create_batch_prediction_job_sample: {}
create_batch_prediction_job_tabular_explain_sample: {}
create_batch_prediction_job_tabular_forecasting_sample: {}
create_batch_prediction_job_text_classification_sample: {}
create_batch_prediction_job_text_entity_extraction_sample: {}
create_batch_prediction_job_text_sentiment_analysis_sample: {}
Expand Down Expand Up @@ -77,6 +78,7 @@ create_training_pipeline_image_object_detection_sample:
training_task_inputs_dict: trainingjob.definition.AutoMlImageObjectDetectionInputs
create_training_pipeline_sample: {}
create_training_pipeline_tabular_classification_sample: {}
create_training_pipeline_tabular_forecasting_sample: {}
create_training_pipeline_tabular_regression_sample: {}
create_training_pipeline_text_classification_sample:
schema_types:
Expand Down Expand Up @@ -168,6 +170,7 @@ get_model_evaluation_sample:
- model_explanation
get_model_evaluation_slice_sample: {}
get_model_evaluation_tabular_classification_sample: {}
get_model_evaluation_tabular_forecasting_sample: {}
get_model_evaluation_tabular_regression_sample: {}
get_model_evaluation_text_classification_sample:
skip:
Expand Down Expand Up @@ -232,6 +235,7 @@ list_endpoints_sample: {}
list_hyperparameter_tuning_jobs_sample: {}
list_model_evaluation_slices_sample: {}
list_model_evaluations_sample: {}
list_model_evaluations_tabular_forecasting_sample: {}
list_models_sample: {}
list_specialist_pools_sample: {}
list_training_pipelines_sample: {}
Expand Down Expand Up @@ -274,6 +278,7 @@ predict_tabular_classification_sample:
comments:
predictions: See gs://google-cloud-aiplatform/schema/predict/prediction/tables_classification.yaml
for the format of the predictions.
predict_tabular_forecasting_sample: {}
predict_tabular_regression_sample:
api_endpoint: us-central1-prediction-aiplatform.googleapis.com
max_depth: 1
Expand Down
2 changes: 2 additions & 0 deletions .sample_configs/variants.yaml
Expand Up @@ -22,6 +22,7 @@ create_batch_prediction_job:
- custom_image_explain
- custom_tabular_explain
- tabular_explain
- tabular_forecasting
- text_classification
- text_entity_extraction
- text_sentiment_analysis
Expand Down Expand Up @@ -59,6 +60,7 @@ create_training_pipeline:
- image_classification
- image_object_detection
- tabular_classification
- tabular_forecasting
- tabular_regression
- text_classification
- text_entity_extraction
Expand Down
@@ -0,0 +1,90 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_create_training_pipeline_tabular_forecasting_sample]
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_training_pipeline_tabular_forecasting_sample(
project: str,
display_name: str,
dataset_id: str,
model_display_name: str,
target_column: str,
time_series_identifier_column: str,
time_column: str,
static_columns: str,
time_variant_past_only_columns: str,
time_variant_past_and_future_columns: str,
forecast_window_end: int,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.PipelineServiceClient(client_options=client_options)
# set the columns used for training and their data types
transformations = [
{"auto": {"column_name": "date"}},
{"auto": {"column_name": "state_name"}},
{"auto": {"column_name": "county_fips_code"}},
{"auto": {"column_name": "confirmed_cases"}},
{"auto": {"column_name": "deaths"}},
]

period = {"unit": "day", "quantity": 1}

# the inputs should be formatted according to the training_task_definition yaml file
training_task_inputs_dict = {
# required inputs
"targetColumn": target_column,
"timeSeriesIdentifierColumn": time_series_identifier_column,
"timeColumn": time_column,
"transformations": transformations,
"period": period,
"optimizationObjective": "minimize-rmse",
"trainBudgetMilliNodeHours": 8000,
"staticColumns": static_columns,
"timeVariantPastOnlyColumns": time_variant_past_only_columns,
"timeVariantPastAndFutureColumns": time_variant_past_and_future_columns,
"forecastWindowEnd": forecast_window_end,
}

training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())

training_pipeline = {
"display_name": display_name,
"training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml",
"training_task_inputs": training_task_inputs,
"input_data_config": {
"dataset_id": dataset_id,
"fraction_split": {
"training_fraction": 0.8,
"validation_fraction": 0.1,
"test_fraction": 0.1,
},
},
"model_to_upload": {"display_name": model_display_name},
}
parent = f"projects/{project}/locations/{location}"
response = client.create_training_pipeline(
parent=parent, training_pipeline=training_pipeline
)
print("response:", response)


# [END aiplatform_create_training_pipeline_tabular_forecasting_sample]
@@ -0,0 +1,87 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from uuid import uuid4

from google.cloud import aiplatform
import pytest

import cancel_training_pipeline_sample
import create_training_pipeline_tabular_forecasting_sample
import delete_training_pipeline_sample
import helpers

PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
DATASET_ID = "3003302817130610688" # COVID Dataset
DISPLAY_NAME = f"temp_create_training_pipeline_test_{uuid4()}"
TARGET_COLUMN = "deaths"
PREDICTION_TYPE = "forecasting"


@pytest.fixture
def shared_state():
state = {}
yield state


@pytest.fixture(scope="function", autouse=True)
def teardown(shared_state):
yield

training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1]

# Stop the training pipeline
cancel_training_pipeline_sample.cancel_training_pipeline_sample(
project=PROJECT_ID, training_pipeline_id=training_pipeline_id
)

client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
pipeline_client = aiplatform.gapic.PipelineServiceClient(
client_options=client_options
)

# Waiting for training pipeline to be in CANCELLED state
helpers.wait_for_job_state(
get_job_method=pipeline_client.get_training_pipeline,
name=shared_state["training_pipeline_name"],
)

# Delete the training pipeline
delete_training_pipeline_sample.delete_training_pipeline_sample(
project=PROJECT_ID, training_pipeline_id=training_pipeline_id
)


def test_ucaip_generated_create_training_pipeline_sample(capsys, shared_state):

create_training_pipeline_tabular_forecasting_sample.create_training_pipeline_tabular_forecasting_sample(
project=PROJECT_ID,
display_name=DISPLAY_NAME,
dataset_id=DATASET_ID,
model_display_name="permanent_tabular_forecasting_model",
target_column=TARGET_COLUMN,
time_series_identifier_column="county",
time_column="date",
static_columns=["state_name"],
time_variant_past_only_columns=["deaths"],
time_variant_past_and_future_columns=["date"],
forecast_window_end=10,
)

out, _ = capsys.readouterr()
assert "response:" in out

# Save resource name of the newly created training pipeline
shared_state["training_pipeline_name"] = helpers.get_name(out)

0 comments on commit 69fc7fd

Please sign in to comment.