New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Added tabular forecasting samples #128
Changes from all commits
d745036
0fee278
57318ce
a6ac96d
42fcc42
e75b880
7e8f7ba
848ee55
669a664
a779c4f
b08ac1e
fd87673
2fc3f90
087ff39
ddd443e
f12db5b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
|
||
def make_parent(parent: str) -> str: | ||
parent = parent | ||
|
||
return parent | ||
|
||
|
||
def make_training_pipeline( | ||
display_name: str, | ||
dataset_id: str, | ||
model_display_name: str, | ||
target_column: str, | ||
time_series_identifier_column: str, | ||
time_column: str, | ||
static_columns: str, | ||
time_variant_past_only_columns: str, | ||
time_variant_past_and_future_columns: str, | ||
forecast_window_end: int, | ||
) -> google.cloud.aiplatform_v1alpha1.types.training_pipeline.TrainingPipeline: | ||
# set the columns used for training and their data types | ||
transformations = [ | ||
{"auto": {"column_name": "date"}}, | ||
{"auto": {"column_name": "state_name"}}, | ||
{"auto": {"column_name": "county_fips_code"}}, | ||
{"auto": {"column_name": "confirmed_cases"}}, | ||
{"auto": {"column_name": "deaths"}}, | ||
] | ||
|
||
period = {"unit": "day", "quantity": 1} | ||
|
||
# the inputs should be formatted according to the training_task_definition yaml file | ||
training_task_inputs_dict = { | ||
# required inputs | ||
"targetColumn": target_column, | ||
"timeSeriesIdentifierColumn": time_series_identifier_column, | ||
"timeColumn": time_column, | ||
"transformations": transformations, | ||
"period": period, | ||
"optimizationObjective": "minimize-rmse", | ||
"trainBudgetMilliNodeHours": 8000, | ||
"staticColumns": static_columns, | ||
"timeVariantPastOnlyColumns": time_variant_past_only_columns, | ||
"timeVariantPastAndFutureColumns": time_variant_past_and_future_columns, | ||
"forecastWindowEnd": forecast_window_end, | ||
} | ||
|
||
training_task_inputs = to_protobuf_value(training_task_inputs_dict) | ||
|
||
training_pipeline = { | ||
"display_name": display_name, | ||
"training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml", | ||
"training_task_inputs": training_task_inputs, | ||
"input_data_config": { | ||
"dataset_id": dataset_id, | ||
"fraction_split": { | ||
"training_fraction": 0.8, | ||
"validation_fraction": 0.1, | ||
"test_fraction": 0.1, | ||
}, | ||
}, | ||
"model_to_upload": {"display_name": model_display_name}, | ||
} | ||
|
||
return training_pipeline | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ create_batch_prediction_job_custom_image_explain_sample: {} | |
create_batch_prediction_job_custom_tabular_explain_sample: {} | ||
create_batch_prediction_job_sample: {} | ||
create_batch_prediction_job_tabular_explain_sample: {} | ||
create_batch_prediction_job_tabular_forecasting_sample: {} | ||
create_batch_prediction_job_text_classification_sample: {} | ||
create_batch_prediction_job_text_entity_extraction_sample: {} | ||
create_batch_prediction_job_text_sentiment_analysis_sample: {} | ||
|
@@ -73,6 +74,7 @@ create_training_pipeline_image_classification_sample: {} | |
create_training_pipeline_image_object_detection_sample: {} | ||
create_training_pipeline_sample: {} | ||
create_training_pipeline_tabular_classification_sample: {} | ||
create_training_pipeline_tabular_forecasting_sample: {} | ||
create_training_pipeline_tabular_regression_sample: {} | ||
create_training_pipeline_text_classification_sample: {} | ||
create_training_pipeline_text_entity_extraction_sample: | ||
|
@@ -182,6 +184,7 @@ get_model_evaluation_sample: | |
- model_explanation | ||
get_model_evaluation_slice_sample: {} | ||
get_model_evaluation_tabular_classification_sample: {} | ||
get_model_evaluation_tabular_forecasting_sample: {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I got it, we want to consolidate if possible. Let me do another pass. |
||
get_model_evaluation_tabular_regression_sample: {} | ||
get_model_evaluation_text_classification_sample: | ||
skip: | ||
|
@@ -246,6 +249,7 @@ list_endpoints_sample: {} | |
list_hyperparameter_tuning_jobs_sample: {} | ||
list_model_evaluation_slices_sample: {} | ||
list_model_evaluations_sample: {} | ||
list_model_evaluations_tabular_forecasting_sample: {} | ||
list_models_sample: {} | ||
list_specialist_pools_sample: {} | ||
list_training_pipelines_sample: {} | ||
|
@@ -288,6 +292,7 @@ predict_tabular_classification_sample: | |
comments: | ||
predictions: See gs://google-cloud-aiplatform/schema/predict/prediction/tables_classification.yaml | ||
for the format of the predictions. | ||
predict_tabular_forecasting_sample: {} | ||
predict_tabular_regression_sample: | ||
api_endpoint: us-central1-prediction-aiplatform.googleapis.com | ||
max_depth: 1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# [START aiplatform_create_training_pipeline_tabular_forecasting_sample] | ||
from google.cloud import aiplatform | ||
from google.protobuf import json_format | ||
from google.protobuf.struct_pb2 import Value | ||
|
||
|
||
def create_training_pipeline_tabular_forecasting_sample( | ||
project: str, | ||
display_name: str, | ||
dataset_id: str, | ||
model_display_name: str, | ||
target_column: str, | ||
time_series_identifier_column: str, | ||
time_column: str, | ||
static_columns: str, | ||
time_variant_past_only_columns: str, | ||
time_variant_past_and_future_columns: str, | ||
forecast_window_end: int, | ||
location: str = "us-central1", | ||
api_endpoint: str = "us-central1-aiplatform.googleapis.com", | ||
): | ||
client_options = {"api_endpoint": api_endpoint} | ||
# Initialize client that will be used to create and send requests. | ||
# This client only needs to be created once, and can be reused for multiple requests. | ||
client = aiplatform.gapic.PipelineServiceClient(client_options=client_options) | ||
# set the columns used for training and their data types | ||
transformations = [ | ||
{"auto": {"column_name": "date"}}, | ||
{"auto": {"column_name": "state_name"}}, | ||
{"auto": {"column_name": "county_fips_code"}}, | ||
{"auto": {"column_name": "confirmed_cases"}}, | ||
{"auto": {"column_name": "deaths"}}, | ||
] | ||
|
||
period = {"unit": "day", "quantity": 1} | ||
|
||
# the inputs should be formatted according to the training_task_definition yaml file | ||
training_task_inputs_dict = { | ||
# required inputs | ||
"targetColumn": target_column, | ||
"timeSeriesIdentifierColumn": time_series_identifier_column, | ||
"timeColumn": time_column, | ||
"transformations": transformations, | ||
"period": period, | ||
"optimizationObjective": "minimize-rmse", | ||
"trainBudgetMilliNodeHours": 8000, | ||
"staticColumns": static_columns, | ||
"timeVariantPastOnlyColumns": time_variant_past_only_columns, | ||
"timeVariantPastAndFutureColumns": time_variant_past_and_future_columns, | ||
"forecastWindowEnd": forecast_window_end, | ||
} | ||
|
||
training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value()) | ||
|
||
training_pipeline = { | ||
"display_name": display_name, | ||
"training_task_definition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml", | ||
"training_task_inputs": training_task_inputs, | ||
"input_data_config": { | ||
"dataset_id": dataset_id, | ||
"fraction_split": { | ||
"training_fraction": 0.8, | ||
"validation_fraction": 0.1, | ||
"test_fraction": 0.1, | ||
}, | ||
}, | ||
"model_to_upload": {"display_name": model_display_name}, | ||
} | ||
parent = f"projects/{project}/locations/{location}" | ||
response = client.create_training_pipeline( | ||
parent=parent, training_pipeline=training_pipeline | ||
) | ||
print("response:", response) | ||
|
||
|
||
# [END aiplatform_create_training_pipeline_tabular_forecasting_sample] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# Copyright 2020 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
from uuid import uuid4 | ||
|
||
from google.cloud import aiplatform | ||
import pytest | ||
|
||
import cancel_training_pipeline_sample | ||
import create_training_pipeline_tabular_forecasting_sample | ||
import delete_training_pipeline_sample | ||
import helpers | ||
|
||
PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") | ||
DATASET_ID = "3003302817130610688" # COVID Dataset | ||
DISPLAY_NAME = f"temp_create_training_pipeline_test_{uuid4()}" | ||
TARGET_COLUMN = "deaths" | ||
PREDICTION_TYPE = "forecasting" | ||
|
||
|
||
@pytest.fixture | ||
def shared_state(): | ||
state = {} | ||
yield state | ||
|
||
|
||
@pytest.fixture(scope="function", autouse=True) | ||
def teardown(shared_state): | ||
yield | ||
|
||
training_pipeline_id = shared_state["training_pipeline_name"].split("/")[-1] | ||
|
||
# Stop the training pipeline | ||
cancel_training_pipeline_sample.cancel_training_pipeline_sample( | ||
project=PROJECT_ID, training_pipeline_id=training_pipeline_id | ||
) | ||
|
||
client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"} | ||
pipeline_client = aiplatform.gapic.PipelineServiceClient( | ||
client_options=client_options | ||
) | ||
|
||
# Waiting for training pipeline to be in CANCELLED state | ||
helpers.wait_for_job_state( | ||
get_job_method=pipeline_client.get_training_pipeline, | ||
name=shared_state["training_pipeline_name"], | ||
) | ||
|
||
# Delete the training pipeline | ||
delete_training_pipeline_sample.delete_training_pipeline_sample( | ||
project=PROJECT_ID, training_pipeline_id=training_pipeline_id | ||
) | ||
|
||
|
||
def test_ucaip_generated_create_training_pipeline_sample(capsys, shared_state): | ||
|
||
create_training_pipeline_tabular_forecasting_sample.create_training_pipeline_tabular_forecasting_sample( | ||
project=PROJECT_ID, | ||
display_name=DISPLAY_NAME, | ||
dataset_id=DATASET_ID, | ||
model_display_name="permanent_tabular_forecasting_model", | ||
target_column=TARGET_COLUMN, | ||
time_series_identifier_column="county", | ||
time_column="date", | ||
static_columns=["state_name"], | ||
time_variant_past_only_columns=["deaths"], | ||
time_variant_past_and_future_columns=["date"], | ||
forecast_window_end=10, | ||
) | ||
|
||
out, _ = capsys.readouterr() | ||
assert "response:" in out | ||
|
||
# Save resource name of the newly created training pipeline | ||
shared_state["training_pipeline_name"] = helpers.get_name(out) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious as to if we should show comments for each of these values in the sample.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@ivanmkc do you mean comments like # display_name: YOUR_DISPLAY_NAME?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for reviewing, I was wondering if we should add comments for each param in the samples.
Seems like Yuhan is suggesting to just tell them to read the docstrings.