Skip to content

Commit

Permalink
feat: AutoML Forecasting, Metadata Experiment Tracking, Tensorboard u…
Browse files Browse the repository at this point in the history
…ploader
  • Loading branch information
sasha-gitg committed May 14, 2021
2 parents cc1a708 + dcc459d commit e94c9db
Show file tree
Hide file tree
Showing 35 changed files with 7,513 additions and 45 deletions.
17 changes: 17 additions & 0 deletions google/cloud/aiplatform/__init__.py
Expand Up @@ -23,6 +23,7 @@
ImageDataset,
TabularDataset,
TextDataset,
TimeSeriesDataset,
VideoDataset,
)
from google.cloud.aiplatform.models import Endpoint
Expand All @@ -33,10 +34,12 @@
CustomContainerTrainingJob,
CustomPythonPackageTrainingJob,
AutoMLTabularTrainingJob,
AutoMLForecastingTrainingJob,
AutoMLImageTrainingJob,
AutoMLTextTrainingJob,
AutoMLVideoTrainingJob,
)
from google.cloud.aiplatform.metadata import metadata

"""
Usage:
Expand All @@ -46,12 +49,25 @@
"""
init = initializer.global_config.init

log_params = metadata.metadata_service.log_params
log_metrics = metadata.metadata_service.log_metrics
get_experiment_df = metadata.metadata_service.get_experiment_df
get_pipeline_df = metadata.metadata_service.get_pipeline_df
start_run = metadata.metadata_service.start_run


__all__ = (
"explain",
"gapic",
"init",
"log_params",
"log_metrics",
"get_experiment_df",
"get_pipeline_df",
"start_run",
"AutoMLImageTrainingJob",
"AutoMLTabularTrainingJob",
"AutoMLForecastingTrainingJob",
"AutoMLTextTrainingJob",
"AutoMLVideoTrainingJob",
"BatchPredictionJob",
Expand All @@ -63,5 +79,6 @@
"Model",
"TabularDataset",
"TextDataset",
"TimeSeriesDataset",
"VideoDataset",
)
9 changes: 9 additions & 0 deletions google/cloud/aiplatform/compat/__init__.py
Expand Up @@ -34,6 +34,8 @@
services.specialist_pool_service_client = (
services.specialist_pool_service_client_v1beta1
)
services.metadata_service_client = services.metadata_service_client_v1beta1
services.tensorboard_service_client = services.tensorboard_service_client_v1beta1

types.accelerator_type = types.accelerator_type_v1beta1
types.annotation = types.annotation_v1beta1
Expand Down Expand Up @@ -69,6 +71,13 @@
types.specialist_pool = types.specialist_pool_v1beta1
types.specialist_pool_service = types.specialist_pool_service_v1beta1
types.training_pipeline = types.training_pipeline_v1beta1
types.metadata_service = types.metadata_service_v1beta1
types.tensorboard_service = types.tensorboard_service_v1beta1
types.tensorboard_data = types.tensorboard_data_v1beta1
types.tensorboard_experiment = types.tensorboard_experiment_v1beta1
types.tensorboard_run = types.tensorboard_run_v1beta1
types.tensorboard_service = types.tensorboard_service_v1beta1
types.tensorboard_time_series = types.tensorboard_time_series_v1beta1

if DEFAULT_VERSION == V1:

Expand Down
8 changes: 8 additions & 0 deletions google/cloud/aiplatform/compat/services/__init__.py
Expand Up @@ -36,6 +36,12 @@
from google.cloud.aiplatform_v1beta1.services.specialist_pool_service import (
client as specialist_pool_service_client_v1beta1,
)
from google.cloud.aiplatform_v1beta1.services.metadata_service import (
client as metadata_service_client_v1beta1,
)
from google.cloud.aiplatform_v1beta1.services.tensorboard_service import (
client as tensorboard_service_client_v1beta1,
)

from google.cloud.aiplatform_v1.services.dataset_service import (
client as dataset_service_client_v1,
Expand Down Expand Up @@ -76,4 +82,6 @@
pipeline_service_client_v1beta1,
prediction_service_client_v1beta1,
specialist_pool_service_client_v1beta1,
metadata_service_client_v1beta1,
tensorboard_service_client_v1beta1,
)
14 changes: 14 additions & 0 deletions google/cloud/aiplatform/compat/types/__init__.py
Expand Up @@ -50,6 +50,13 @@
specialist_pool as specialist_pool_v1beta1,
specialist_pool_service as specialist_pool_service_v1beta1,
training_pipeline as training_pipeline_v1beta1,
metadata_service as metadata_service_v1beta1,
tensorboard_service as tensorboard_service_v1beta1,
tensorboard_data as tensorboard_data_v1beta1,
tensorboard_experiment as tensorboard_experiment_v1beta1,
tensorboard_run as tensorboard_run_v1beta1,
tensorboard_service as tensorboard_service_v1beta1,
tensorboard_time_series as tensorboard_time_series_v1beta1,
)
from google.cloud.aiplatform_v1.types import (
accelerator_type as accelerator_type_v1,
Expand Down Expand Up @@ -155,4 +162,11 @@
specialist_pool_v1beta1,
specialist_pool_service_v1beta1,
training_pipeline_v1beta1,
metadata_service_v1beta1,
tensorboard_service_v1beta1,
tensorboard_data_v1beta1,
tensorboard_experiment_v1beta1,
tensorboard_run_v1beta1,
tensorboard_service_v1beta1,
tensorboard_time_series_v1beta1,
)
2 changes: 2 additions & 0 deletions google/cloud/aiplatform/datasets/__init__.py
Expand Up @@ -17,6 +17,7 @@

from google.cloud.aiplatform.datasets.dataset import _Dataset
from google.cloud.aiplatform.datasets.tabular_dataset import TabularDataset
from google.cloud.aiplatform.datasets.time_series_dataset import TimeSeriesDataset
from google.cloud.aiplatform.datasets.image_dataset import ImageDataset
from google.cloud.aiplatform.datasets.text_dataset import TextDataset
from google.cloud.aiplatform.datasets.video_dataset import VideoDataset
Expand All @@ -25,6 +26,7 @@
__all__ = (
"_Dataset",
"TabularDataset",
"TimeSeriesDataset",
"ImageDataset",
"TextDataset",
"VideoDataset",
Expand Down
5 changes: 5 additions & 0 deletions google/cloud/aiplatform/datasets/_datasources.py
Expand Up @@ -225,6 +225,11 @@ def create_datasource(
raise ValueError("tabular dataset does not support data import.")
return TabularDatasource(gcs_source, bq_source)

if metadata_schema_uri == schema.dataset.metadata.time_series:
if import_schema_uri:
raise ValueError("time series dataset does not support data import.")
return TabularDatasource(gcs_source, bq_source)

if not import_schema_uri and not gcs_source:
return NonTabularDatasource()
elif import_schema_uri and gcs_source:
Expand Down
4 changes: 2 additions & 2 deletions google/cloud/aiplatform/datasets/dataset.py
Expand Up @@ -162,7 +162,7 @@ def create(
if their content bytes are identical (e.g. image bytes or
pdf bytes). These labels will be overridden by Annotation
labels specified inside index file refenced by
[import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri],
``import_schema_uri``,
e.g. jsonl file.
project (str):
Project to upload this model to. Overrides project set in
Expand Down Expand Up @@ -449,7 +449,7 @@ def import_data(
if their content bytes are identical (e.g. image bytes or
pdf bytes). These labels will be overridden by Annotation
labels specified inside index file refenced by
[import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri],
``import_schema_uri``,
e.g. jsonl file.
sync (bool):
Whether to execute this method synchronously. If False, this method
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/aiplatform/datasets/image_dataset.py
Expand Up @@ -82,7 +82,7 @@ def create(
if their content bytes are identical (e.g. image bytes or
pdf bytes). These labels will be overridden by Annotation
labels specified inside index file refenced by
[import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri],
``import_schema_uri``,
e.g. jsonl file.
project (str):
Project to upload this model to. Overrides project set in
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/aiplatform/datasets/text_dataset.py
Expand Up @@ -89,7 +89,7 @@ def create(
if their content bytes are identical (e.g. image bytes or
pdf bytes). These labels will be overridden by Annotation
labels specified inside index file refenced by
[import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri],
``import_schema_uri``,
e.g. jsonl file.
project (str):
Project to upload this model to. Overrides project set in
Expand Down
134 changes: 134 additions & 0 deletions google/cloud/aiplatform/datasets/time_series_dataset.py
@@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from typing import Optional, Sequence, Tuple, Union

from google.auth import credentials as auth_credentials

from google.cloud.aiplatform import datasets
from google.cloud.aiplatform.datasets import _datasources
from google.cloud.aiplatform import initializer
from google.cloud.aiplatform import schema
from google.cloud.aiplatform import utils


class TimeSeriesDataset(datasets._Dataset):
"""Managed time series dataset resource for AI Platform"""

_supported_metadata_schema_uris: Optional[Tuple[str]] = (
schema.dataset.metadata.time_series,
)

@classmethod
def create(
cls,
display_name: str,
gcs_source: Optional[Union[str, Sequence[str]]] = None,
bq_source: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
encryption_spec_key_name: Optional[str] = None,
sync: bool = True,
) -> "TimeSeriesDataset":
"""Creates a new tabular dataset.
Args:
display_name (str):
Required. The user-defined name of the Dataset.
The name can be up to 128 characters long and can be consist
of any UTF-8 characters.
gcs_source (Union[str, Sequence[str]]):
Google Cloud Storage URI(-s) to the
input file(s). May contain wildcards. For more
information on wildcards, see
https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.
examples:
str: "gs://bucket/file.csv"
Sequence[str]: ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]
bq_source (str):
BigQuery URI to the input table.
example:
"bq://project.dataset.table_name"
project (str):
Project to upload this model to. Overrides project set in
aiplatform.init.
location (str):
Location to upload this model to. Overrides location set in
aiplatform.init.
credentials (auth_credentials.Credentials):
Custom credentials to use to upload this model. Overrides
credentials set in aiplatform.init.
request_metadata (Sequence[Tuple[str, str]]):
Strings which should be sent along with the request as metadata.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the dataset. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Dataset and all sub-resources of this Dataset will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
Returns:
time_series_dataset (TimeSeriesDataset):
Instantiated representation of the managed time series dataset resource.
"""

utils.validate_display_name(display_name)

api_client = cls._instantiate_client(location=location, credentials=credentials)

metadata_schema_uri = schema.dataset.metadata.time_series

datasource = _datasources.create_datasource(
metadata_schema_uri=metadata_schema_uri,
gcs_source=gcs_source,
bq_source=bq_source,
)

return cls._create_and_import(
api_client=api_client,
parent=initializer.global_config.common_location_path(
project=project, location=location
),
display_name=display_name,
metadata_schema_uri=metadata_schema_uri,
datasource=datasource,
project=project or initializer.global_config.project,
location=location or initializer.global_config.location,
credentials=credentials or initializer.global_config.credentials,
request_metadata=request_metadata,
encryption_spec=initializer.global_config.get_encryption_spec(
encryption_spec_key_name=encryption_spec_key_name
),
sync=sync,
)

def import_data(self):
raise NotImplementedError(
f"{self.__class__.__name__} class does not support 'import_data'"
)
2 changes: 1 addition & 1 deletion google/cloud/aiplatform/datasets/video_dataset.py
Expand Up @@ -82,7 +82,7 @@ def create(
if their content bytes are identical (e.g. image bytes or
pdf bytes). These labels will be overridden by Annotation
labels specified inside index file refenced by
[import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri],
``import_schema_uri``,
e.g. jsonl file.
project (str):
Project to upload this model to. Overrides project set in
Expand Down

0 comments on commit e94c9db

Please sign in to comment.