diff --git a/.sample_configs/param_handlers/create_training_pipeline_tabular_forecasting_sample.py b/.sample_configs/param_handlers/create_training_pipeline_tabular_forecasting_sample.py new file mode 100644 index 0000000000..5c0f63a781 --- /dev/null +++ b/.sample_configs/param_handlers/create_training_pipeline_tabular_forecasting_sample.py @@ -0,0 +1,80 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +def make_parent(parent: str) -> str: + parent = parent + + return parent + + +def make_training_pipeline( + display_name: str, + dataset_id: str, + model_display_name: str, + target_column: str, + time_series_identifier_column: str, + time_column: str, + static_columns: str, + time_variant_past_only_columns: str, + time_variant_past_and_future_columns: str, + forecast_window_end: int, +) -> google.cloud.aiplatform_v1alpha1.types.training_pipeline.TrainingPipeline: + # set the columns used for training and their data types + transformations = [ + {"auto": {"column_name": "date"}}, + {"auto": {"column_name": "state_name"}}, + {"auto": {"column_name": "county_fips_code"}}, + {"auto": {"column_name": "confirmed_cases"}}, + {"auto": {"column_name": "deaths"}}, + ] + + period = {"unit": "day", "quantity": 1} + + # the inputs should be formatted according to the training_task_definition yaml file + training_task_inputs_dict = { + # required inputs + "targetColumn": target_column, + "timeSeriesIdentifierColumn": time_series_identifier_column, + "timeColumn": time_column, + "transformations": transformations, + "period": period, + "optimizationObjective": "minimize-rmse", + "trainBudgetMilliNodeHours": 8000, + "staticColumns": static_columns, + "timeVariantPastOnlyColumns": time_variant_past_only_columns, + "timeVariantPastAndFutureColumns": time_variant_past_and_future_columns, + "forecastWindowEnd": forecast_window_end, + } + + training_task_inputs = to_protobuf_value(training_task_inputs_dict) + + training_pipeline = { + "display_name": display_name, + "training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml", + "training_task_inputs": training_task_inputs, + "input_data_config": { + "dataset_id": dataset_id, + "fraction_split": { + "training_fraction": 0.8, + "validation_fraction": 0.1, + "test_fraction": 0.1, + }, + }, + "model_to_upload": {"display_name": model_display_name}, + } + + return training_pipeline + diff --git a/.sample_configs/process_configs.yaml b/.sample_configs/process_configs.yaml index 7eb80f400f..8ba791d311 100644 --- a/.sample_configs/process_configs.yaml +++ b/.sample_configs/process_configs.yaml @@ -18,6 +18,7 @@ create_batch_prediction_job_custom_image_explain_sample: {} create_batch_prediction_job_custom_tabular_explain_sample: {} create_batch_prediction_job_sample: {} create_batch_prediction_job_tabular_explain_sample: {} +create_batch_prediction_job_tabular_forecasting_sample: {} create_batch_prediction_job_text_classification_sample: {} create_batch_prediction_job_text_entity_extraction_sample: {} create_batch_prediction_job_text_sentiment_analysis_sample: {} @@ -73,6 +74,7 @@ create_training_pipeline_image_classification_sample: {} create_training_pipeline_image_object_detection_sample: {} create_training_pipeline_sample: {} create_training_pipeline_tabular_classification_sample: {} +create_training_pipeline_tabular_forecasting_sample: {} create_training_pipeline_tabular_regression_sample: {} create_training_pipeline_text_classification_sample: {} create_training_pipeline_text_entity_extraction_sample: @@ -182,6 +184,7 @@ get_model_evaluation_sample: - model_explanation get_model_evaluation_slice_sample: {} get_model_evaluation_tabular_classification_sample: {} +get_model_evaluation_tabular_forecasting_sample: {} get_model_evaluation_tabular_regression_sample: {} get_model_evaluation_text_classification_sample: skip: @@ -246,6 +249,7 @@ list_endpoints_sample: {} list_hyperparameter_tuning_jobs_sample: {} list_model_evaluation_slices_sample: {} list_model_evaluations_sample: {} +list_model_evaluations_tabular_forecasting_sample: {} list_models_sample: {} list_specialist_pools_sample: {} list_training_pipelines_sample: {} @@ -288,6 +292,7 @@ predict_tabular_classification_sample: comments: predictions: See gs://google-cloud-aiplatform/schema/predict/prediction/tables_classification.yaml for the format of the predictions. +predict_tabular_forecasting_sample: {} predict_tabular_regression_sample: api_endpoint: us-central1-prediction-aiplatform.googleapis.com max_depth: 1 diff --git a/.sample_configs/variants.yaml b/.sample_configs/variants.yaml index 59a0fd78f5..0ef9cf7bc7 100644 --- a/.sample_configs/variants.yaml +++ b/.sample_configs/variants.yaml @@ -22,6 +22,7 @@ create_batch_prediction_job: - custom_image_explain - custom_tabular_explain - tabular_explain +- tabular_forecasting - text_classification - text_entity_extraction - text_sentiment_analysis @@ -59,6 +60,7 @@ create_training_pipeline: - image_classification - image_object_detection - tabular_classification +- tabular_forecasting - tabular_regression - text_classification - text_entity_extraction diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py b/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py new file mode 100644 index 0000000000..5c04fccc79 --- /dev/null +++ b/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py @@ -0,0 +1,90 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START aiplatform_create_training_pipeline_tabular_forecasting_sample] +from google.cloud import aiplatform +from google.protobuf import json_format +from google.protobuf.struct_pb2 import Value + + +def create_training_pipeline_tabular_forecasting_sample( + project: str, + display_name: str, + dataset_id: str, + model_display_name: str, + target_column: str, + time_series_identifier_column: str, + time_column: str, + static_columns: str, + time_variant_past_only_columns: str, + time_variant_past_and_future_columns: str, + forecast_window_end: int, + location: str = "us-central1", + api_endpoint: str = "us-central1-aiplatform.googleapis.com", +): + client_options = {"api_endpoint": api_endpoint} + # Initialize client that will be used to create and send requests. + # This client only needs to be created once, and can be reused for multiple requests. + client = aiplatform.gapic.PipelineServiceClient(client_options=client_options) + # set the columns used for training and their data types + transformations = [ + {"auto": {"column_name": "date"}}, + {"auto": {"column_name": "state_name"}}, + {"auto": {"column_name": "county_fips_code"}}, + {"auto": {"column_name": "confirmed_cases"}}, + {"auto": {"column_name": "deaths"}}, + ] + + period = {"unit": "day", "quantity": 1} + + # the inputs should be formatted according to the training_task_definition yaml file + training_task_inputs_dict = { + # required inputs + "targetColumn": target_column, + "timeSeriesIdentifierColumn": time_series_identifier_column, + "timeColumn": time_column, + "transformations": transformations, + "period": period, + "optimizationObjective": "minimize-rmse", + "trainBudgetMilliNodeHours": 8000, + "staticColumns": static_columns, + "timeVariantPastOnlyColumns": time_variant_past_only_columns, + "timeVariantPastAndFutureColumns": time_variant_past_and_future_columns, + "forecastWindowEnd": forecast_window_end, + } + + training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value()) + + training_pipeline = { + "display_name": display_name, + "training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml", + "training_task_inputs": training_task_inputs, + "input_data_config": { + "dataset_id": dataset_id, + "fraction_split": { + "training_fraction": 0.8, + "validation_fraction": 0.1, + "test_fraction": 0.1, + }, + }, + "model_to_upload": {"display_name": model_display_name}, + } + parent = f"projects/{project}/locations/{location}" + response = client.create_training_pipeline( + parent=parent, training_pipeline=training_pipeline + ) + print("response:", response) + + +# [END aiplatform_create_training_pipeline_tabular_forecasting_sample] diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py b/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py new file mode 100644 index 0000000000..d5c58a7992 --- /dev/null +++ b/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py @@ -0,0 +1,87 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from uuid import uuid4 + +from google.cloud import aiplatform +import pytest + +import cancel_training_pipeline_sample +import create_training_pipeline_tabular_forecasting_sample +import delete_training_pipeline_sample +import helpers + +PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +DATASET_ID = "3003302817130610688" # COVID Dataset +DISPLAY_NAME = f"temp_create_training_pipeline_test_{uuid4()}" +TARGET_COLUMN = "deaths" +PREDICTION_TYPE = "forecasting" + + +@pytest.fixture +def shared_state(): + state = {} + yield state + + +@pytest.fixture(scope="function", autouse=True) +def teardown(shared_state): + yield + + training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1] + + # Stop the training pipeline + cancel_training_pipeline_sample.cancel_training_pipeline_sample( + project=PROJECT_ID, training_pipeline_id=training_pipeline_id + ) + + client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"} + pipeline_client = aiplatform.gapic.PipelineServiceClient( + client_options=client_options + ) + + # Waiting for training pipeline to be in CANCELLED state + helpers.wait_for_job_state( + get_job_method=pipeline_client.get_training_pipeline, + name=shared_state["training_pipeline_name"], + ) + + # Delete the training pipeline + delete_training_pipeline_sample.delete_training_pipeline_sample( + project=PROJECT_ID, training_pipeline_id=training_pipeline_id + ) + + +def test_ucaip_generated_create_training_pipeline_sample(capsys, shared_state): + + create_training_pipeline_tabular_forecasting_sample.create_training_pipeline_tabular_forecasting_sample( + project=PROJECT_ID, + display_name=DISPLAY_NAME, + dataset_id=DATASET_ID, + model_display_name="permanent_tabular_forecasting_model", + target_column=TARGET_COLUMN, + time_series_identifier_column="county", + time_column="date", + static_columns=["state_name"], + time_variant_past_only_columns=["deaths"], + time_variant_past_and_future_columns=["date"], + forecast_window_end=10, + ) + + out, _ = capsys.readouterr() + assert "response:" in out + + # Save resource name of the newly created training pipeline + shared_state["training_pipeline_name"] = helpers.get_name(out)