forked from googleapis/python-aiplatform
-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_training_pipeline_tabular_forecasting_sample.py
90 lines (80 loc) · 3.54 KB
/
create_training_pipeline_tabular_forecasting_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
def make_parent(parent: str) -> str:
parent = parent
return parent
def make_training_pipeline(
display_name: str,
dataset_id: str,
model_display_name: str,
target_column: str,
time_series_identifier_column: str,
time_column: str,
static_columns: str,
time_variant_past_only_columns: str,
time_variant_past_and_future_columns: str,
forecast_window_end: int,
) -> google.cloud.aiplatform_v1alpha1.types.training_pipeline.TrainingPipeline:
# set the columns used for training and their data types
transformations = [
{"auto": {"column_name": "date"}},
{"auto": {"column_name": "state_name"}},
{"auto": {"column_name": "county_fips_code"}},
{"auto": {"column_name": "confirmed_cases"}},
{"auto": {"column_name": "deaths"}}
]
period = {"unit": "day", "quantity": 1}
training_task_inputs_dict = {
# required inputs
"targetColumn": target_column,
"timeSeriesIdentifierColumn": time_series_identifier_column,
"timeColumn": time_column,
"transformations": transformations,
"period": period,
# Objective function the model is to be optimized towards.
# The training process creates a Model that optimizes the value of the objective
# function over the validation set. The supported optimization objectives:
# "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE).
# "minimize-mae" - Minimize mean-absolute error (MAE).
# "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE).
# "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE).
# "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE)
# and mean-absolute-error (MAE).
# "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles.
"optimizationObjective": "minimize-rmse",
"trainBudgetMilliNodeHours": 8000,
"staticColumns": static_columns,
"timeVariantPastOnlyColumns": time_variant_past_only_columns,
"timeVariantPastAndFutureColumns": time_variant_past_and_future_columns,
"forecastWindowEnd": forecast_window_end,
}
training_task_inputs = to_protobuf_value(training_task_inputs_dict)
training_pipeline = {
'display_name': display_name,
'training_task_definition': "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml",
'training_task_inputs': training_task_inputs,
'input_data_config': {
'dataset_id': dataset_id,
'fraction_split': {
'training_fraction': 0.8,
'validation_fraction': 0.1,
'test_fraction': 0.1,
}
},
'model_to_upload': {
'display_name': model_display_name
}
}
return training_pipeline