diff --git a/google/cloud/aiplatform/datasets/dataset.py b/google/cloud/aiplatform/datasets/dataset.py index c41b252869..5e5de0058b 100644 --- a/google/cloud/aiplatform/datasets/dataset.py +++ b/google/cloud/aiplatform/datasets/dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union, List +from typing import Dict, List, Optional, Sequence, Tuple, Union from google.api_core import operation from google.auth import credentials as auth_credentials @@ -115,6 +115,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "_Dataset": @@ -176,6 +177,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -198,6 +209,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -221,6 +234,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), @@ -240,6 +254,7 @@ def _create_and_import( location: str, credentials: Optional[auth_credentials.Credentials], request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, sync: bool = True, ) -> "_Dataset": @@ -277,6 +292,16 @@ def _create_and_import( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec (Optional[gca_encryption_spec.EncryptionSpec]): Optional. The Cloud KMS customer managed encryption key used to protect the dataset. The key needs to be in the same region as where the compute @@ -300,6 +325,7 @@ def _create_and_import( metadata_schema_uri=metadata_schema_uri, datasource=datasource, request_metadata=request_metadata, + labels=labels, encryption_spec=encryption_spec, ) @@ -346,6 +372,7 @@ def _create( metadata_schema_uri: str, datasource: _datasources.Datasource, request_metadata: Sequence[Tuple[str, str]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, ) -> operation.Operation: """Creates a new managed dataset by directly calling API client. @@ -373,6 +400,16 @@ def _create( request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the create_dataset request as metadata. Usually to specify special dataset config. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec (Optional[gca_encryption_spec.EncryptionSpec]): Optional. The Cloud KMS customer managed encryption key used to protect the dataset. The key needs to be in the same region as where the compute @@ -388,6 +425,7 @@ def _create( display_name=display_name, metadata_schema_uri=metadata_schema_uri, metadata=datasource.dataset_metadata, + labels=labels, encryption_spec=encryption_spec, ) diff --git a/google/cloud/aiplatform/datasets/image_dataset.py b/google/cloud/aiplatform/datasets/image_dataset.py index 506338c915..bebc75beab 100644 --- a/google/cloud/aiplatform/datasets/image_dataset.py +++ b/google/cloud/aiplatform/datasets/image_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "ImageDataset": @@ -95,6 +96,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -117,6 +128,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -141,6 +154,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/tabular_dataset.py b/google/cloud/aiplatform/datasets/tabular_dataset.py index f9a9658d7e..741a2cc643 100644 --- a/google/cloud/aiplatform/datasets/tabular_dataset.py +++ b/google/cloud/aiplatform/datasets/tabular_dataset.py @@ -18,7 +18,7 @@ import csv import logging -from typing import List, Optional, Sequence, Set, Tuple, Union +from typing import Dict, List, Optional, Sequence, Set, Tuple, Union from google.auth import credentials as auth_credentials @@ -269,6 +269,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TabularDataset": @@ -302,6 +303,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -324,6 +335,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -347,6 +360,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/text_dataset.py b/google/cloud/aiplatform/datasets/text_dataset.py index 85676ed2ed..140fd17335 100644 --- a/google/cloud/aiplatform/datasets/text_dataset.py +++ b/google/cloud/aiplatform/datasets/text_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TextDataset": @@ -102,6 +103,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -124,6 +135,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -148,6 +161,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/time_series_dataset.py b/google/cloud/aiplatform/datasets/time_series_dataset.py index 1a5d62bb39..5bad36b896 100644 --- a/google/cloud/aiplatform/datasets/time_series_dataset.py +++ b/google/cloud/aiplatform/datasets/time_series_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -43,6 +43,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TimeSeriesDataset": @@ -76,6 +77,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -99,6 +110,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -122,6 +135,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/video_dataset.py b/google/cloud/aiplatform/datasets/video_dataset.py index 594a4ac407..2964b77f19 100644 --- a/google/cloud/aiplatform/datasets/video_dataset.py +++ b/google/cloud/aiplatform/datasets/video_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "VideoDataset": @@ -95,6 +96,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -117,6 +128,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -141,6 +154,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 66b0479ced..720aa46b21 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -370,7 +370,7 @@ def create( explanation_parameters: Optional[ "aiplatform.explain.ExplanationParameters" ] = None, - labels: Optional[dict] = None, + labels: Optional[Dict[str, str]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -499,8 +499,8 @@ def create( a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs ` - labels (Optional[dict]): - The labels with user-defined metadata to organize your + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. @@ -533,6 +533,8 @@ def create( """ utils.validate_display_name(job_display_name) + if labels: + utils.validate_labels(labels) model_name = utils.full_resource_name( resource_name=model_name, @@ -935,6 +937,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, ): @@ -960,7 +963,8 @@ def __init__( my_job = aiplatform.CustomJob( display_name='my_job', - worker_pool_specs=worker_pool_specs + worker_pool_specs=worker_pool_specs, + labels={'my_key': 'my_value'}, ) my_job.run() @@ -989,6 +993,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional.Custom credentials to use to run call custom job service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize CustomJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional.Customer-managed encryption key name for a CustomJob. If this is set, then all resources @@ -1013,6 +1027,9 @@ def __init__( "should be set using aiplatform.init(staging_bucket='gs://my-bucket')" ) + if labels: + utils.validate_labels(labels) + # default directory if not given base_output_dir = base_output_dir or utils._timestamped_gcs_dir( staging_bucket, "aiplatform-custom-job" @@ -1026,6 +1043,7 @@ def __init__( output_uri_prefix=base_output_dir ), ), + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), @@ -1063,6 +1081,7 @@ def from_local_script( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, ) -> "CustomJob": @@ -1078,6 +1097,7 @@ def from_local_script( replica_count=1, args=['--dataset', 'gs://my-bucket/my-dataset', '--model_output_uri', 'gs://my-bucket/model'] + labels={'my_key': 'my_value'}, ) job.run() @@ -1126,6 +1146,16 @@ def from_local_script( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call custom job service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize CustomJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional. Customer-managed encryption key name for a CustomJob. If this is set, then all resources @@ -1150,6 +1180,9 @@ def from_local_script( "should be set using aiplatform.init(staging_bucket='gs://my-bucket')" ) + if labels: + utils.validate_labels(labels) + worker_pool_specs = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool( replica_count=replica_count, machine_type=machine_type, @@ -1188,6 +1221,7 @@ def from_local_script( project=project, location=location, credentials=credentials, + labels=labels, encryption_spec_key_name=encryption_spec_key_name, staging_bucket=staging_bucket, ) @@ -1325,6 +1359,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, ): """ @@ -1353,7 +1388,8 @@ def __init__( custom_job = aiplatform.CustomJob( display_name='my_job', - worker_pool_specs=worker_pool_specs + worker_pool_specs=worker_pool_specs, + labels={'my_key': 'my_value'}, ) @@ -1371,6 +1407,7 @@ def __init__( }, max_trial_count=128, parallel_trial_count=8, + labels={'my_key': 'my_value'}, ) hp_job.run() @@ -1466,6 +1503,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call HyperparameterTuning service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize HyperparameterTuningJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional. Customer-managed encryption key options for a HyperparameterTuningJob. If this is set, then @@ -1503,6 +1550,7 @@ def __init__( parallel_trial_count=parallel_trial_count, max_failed_trial_count=max_failed_trial_count, trial_job_spec=copy.deepcopy(custom_job.job_spec), + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 9c53ff5a2d..c1518ce89d 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -154,7 +154,7 @@ def create( cls, display_name: str, description: Optional[str] = None, - labels: Optional[Dict] = None, + labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), project: Optional[str] = None, location: Optional[str] = None, @@ -177,7 +177,7 @@ def create( set in aiplatform.init will be used. description (str): Optional. The description of the Endpoint. - labels (Dict): + labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 @@ -216,6 +216,8 @@ def create( api_client = cls._instantiate_client(location=location, credentials=credentials) utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) project = project or initializer.global_config.project location = location or initializer.global_config.location @@ -244,7 +246,7 @@ def _create( project: str, location: str, description: Optional[str] = None, - labels: Optional[Dict] = None, + labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), credentials: Optional[auth_credentials.Credentials] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, @@ -268,7 +270,7 @@ def _create( set in aiplatform.init will be used. description (str): Optional. The description of the Endpoint. - labels (Dict): + labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 @@ -1470,6 +1472,7 @@ def upload( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync=True, ) -> "Model": @@ -1593,6 +1596,16 @@ def upload( credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the @@ -1611,6 +1624,8 @@ def upload( is specified. """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) if bool(explanation_metadata) != bool(explanation_parameters): raise ValueError( @@ -1667,6 +1682,7 @@ def upload( description=description, container_spec=container_spec, predict_schemata=model_predict_schemata, + labels=labels, encryption_spec=encryption_spec, ) @@ -1991,7 +2007,7 @@ def batch_predict( generate_explanation: Optional[bool] = False, explanation_metadata: Optional[explain.ExplanationMetadata] = None, explanation_parameters: Optional[explain.ExplanationParameters] = None, - labels: Optional[dict] = None, + labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, @@ -2126,7 +2142,7 @@ def batch_predict( a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs ` - labels: Optional[dict] = None + labels: Optional[Dict[str, str]] = None Optional. The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index 29a31a3ced..393f61c965 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -161,12 +161,7 @@ def __init__( utils.validate_display_name(display_name) if labels: - for k, v in labels.items(): - if not isinstance(k, str) or not isinstance(v, str): - raise ValueError( - "Expect labels to be a mapping of string key value pairs. " - 'Got "{}".'.format(labels) - ) + utils.validate_labels(labels) super().__init__(project=project, location=location, credentials=credentials) diff --git a/google/cloud/aiplatform/tensorboard/tensorboard.py b/google/cloud/aiplatform/tensorboard/tensorboard.py index 1b561a6557..3fe6507968 100644 --- a/google/cloud/aiplatform/tensorboard/tensorboard.py +++ b/google/cloud/aiplatform/tensorboard/tensorboard.py @@ -146,6 +146,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -245,6 +247,7 @@ def update( update_mask.append("description") if labels: + utils.validate_labels(labels) update_mask.append("labels") encryption_spec = None diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 8e89509246..db7db10f2f 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -80,6 +80,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -96,6 +97,16 @@ def __init__( aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional credentials to use to retrieve the model. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -123,9 +134,12 @@ def __init__( Overrides encryption_spec_key_name set in aiplatform.init. """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) super().__init__(project=project, location=location, credentials=credentials) self._display_name = display_name + self._labels = labels self._training_encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=training_encryption_spec_key_name ) @@ -581,6 +595,7 @@ def _run_job( training_task_inputs=training_task_inputs, model_to_upload=model, input_data_config=input_data_config, + labels=self._labels, encryption_spec=self._training_encryption_spec, ) @@ -881,6 +896,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -985,6 +1001,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -1019,6 +1045,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -1107,6 +1134,7 @@ def network(self) -> Optional[str]: def _prepare_and_validate_run( self, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, replica_count: int = 1, machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", @@ -1122,6 +1150,16 @@ def _prepare_and_validate_run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. replica_count (int): The number of worker replicas. If replica count = 1 then one chief replica will be provisioned. If replica_count > 1 the remainder will be @@ -1172,6 +1210,11 @@ def _prepare_and_validate_run( if model_display_name: utils.validate_display_name(model_display_name) managed_model.display_name = model_display_name + if model_labels: + utils.validate_labels(model_labels) + managed_model.labels = model_labels + else: + managed_model.labels = self._labels else: managed_model = None @@ -1313,6 +1356,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -1326,14 +1370,21 @@ def __init__( container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata) + model_serving_container_health_route='metadata, + labels={'key': 'value'}, + ) Usage with Dataset: ds = aiplatform.TabularDataset( 'projects/my-project/locations/us-central1/datasets/12345') - job.run(ds, replica_count=1, model_display_name='my-trained-model') + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) Usage without Dataset: @@ -1447,6 +1498,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -1481,6 +1542,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -1515,6 +1577,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -1594,6 +1657,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -1696,6 +1769,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -1937,6 +2011,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -1949,14 +2024,21 @@ def __init__( command=['python3', 'run_script.py'] model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata) + model_serving_container_health_route='metadata, + labels={'key': 'value'}, + ) Usage with Dataset: ds = aiplatform.TabularDataset( 'projects/my-project/locations/us-central1/datasets/12345') - job.run(ds, replica_count=1, model_display_name='my-trained-model') + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) Usage without Dataset: @@ -2070,6 +2152,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -2104,6 +2196,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -2137,6 +2230,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -2209,6 +2303,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -2316,6 +2420,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -2532,6 +2637,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -2544,6 +2650,7 @@ def __init__( optimization_prediction_type="classification", optimization_objective="minimize-log-loss", column_specs={"column_1": "auto", "column_2": "numeric"}, + labels={'key': 'value'}, ) Args: @@ -2627,6 +2734,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -2661,6 +2778,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -2704,6 +2822,7 @@ def run( weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, @@ -2774,6 +2893,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -2812,6 +2941,10 @@ def run( Raises: RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) if self._is_waiting_to_run(): raise RuntimeError("AutoML Tabular Training is already scheduled to run.") @@ -2829,6 +2962,7 @@ def run( weight_column=weight_column, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, + model_labels=model_labels, disable_early_stopping=disable_early_stopping, export_evaluated_data_items=export_evaluated_data_items, export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, @@ -2848,6 +2982,7 @@ def _run( weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, @@ -2918,6 +3053,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3008,11 +3153,9 @@ def _run( "additionalExperiments" ] = self._additional_experiments - if model_display_name is None: - model_display_name = self._display_name - model = gca_model.Model( - display_name=model_display_name, + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, encryption_spec=self._model_encryption_spec, ) @@ -3088,6 +3231,7 @@ class AutoMLForecastingTrainingJob(_TrainingJob): def __init__( self, display_name: str, + labels: Optional[Dict[str, str]] = None, optimization_objective: Optional[str] = None, column_transformations: Optional[Union[Dict, List[Dict]]] = None, project: Optional[str] = None, @@ -3099,6 +3243,16 @@ def __init__( Args: display_name (str): Required. The user-defined name of this TrainingPipeline. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. optimization_objective (str): Optional. Objective function the model is to be optimized towards. The training process creates a Model that optimizes the value of the objective @@ -3130,6 +3284,7 @@ def __init__( """ super().__init__( display_name=display_name, + labels=labels, project=project, location=location, credentials=credentials, @@ -3160,6 +3315,7 @@ def run( validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -3279,6 +3435,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3291,6 +3457,11 @@ def run( RuntimeError if Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError( "AutoML Forecasting Training is already scheduled to run." @@ -3320,6 +3491,7 @@ def run( quantiles=quantiles, validation_options=validation_options, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -3346,6 +3518,7 @@ def _run( validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -3464,6 +3637,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3515,10 +3698,10 @@ def _run( "additionalExperiments" ] = self._additional_experiments - if model_display_name is None: - model_display_name = self._display_name - - model = gca_model.Model(display_name=model_display_name) + model = gca_model.Model( + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, + ) return self._run_job( training_task_definition=training_task_definition, @@ -3564,6 +3747,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -3629,6 +3813,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -3689,6 +3883,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -3706,6 +3901,7 @@ def run( test_fraction_split: float = 0.1, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, sync: bool = True, ) -> models.Model: @@ -3752,6 +3948,16 @@ def run( Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping: bool = False Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3770,6 +3976,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Image Training is already scheduled to run.") @@ -3784,6 +3995,7 @@ def run( test_fraction_split=test_fraction_split, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, + model_labels=model_labels, disable_early_stopping=disable_early_stopping, sync=sync, ) @@ -3798,6 +4010,7 @@ def _run( test_fraction_split: float = 0.1, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, sync: bool = True, ) -> models.Model: @@ -3852,6 +4065,16 @@ def _run( characters. If a `base_model` was provided, the display_name in the base_model will be overritten with this value. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3888,6 +4111,7 @@ def _run( model_tbt = gca_model.Model(encryption_spec=self._model_encryption_spec) model_tbt.display_name = model_display_name or self._display_name + model_tbt.labels = model_labels or self._labels if base_model: # Use provided base_model to pass to model_to_upload causing the @@ -3945,6 +4169,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -3958,7 +4183,8 @@ def __init__( container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata + model_serving_container_health_route='metadata, + labels={'key': 'value'}, ) Usage with Dataset: @@ -3970,14 +4196,16 @@ def __init__( job.run( ds, replica_count=1, - model_display_name='my-trained-model' + model_display_name='my-trained-model', + model_labels={'key': 'value'}, ) Usage without Dataset: job.run( replica_count=1, - model_display_name='my-trained-model' + model_display_name='my-trained-model', + model_labels={'key': 'value'}, ) To ensure your model gets saved in Vertex AI, write your saved model to @@ -4086,6 +4314,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4120,6 +4358,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -4152,6 +4391,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -4224,6 +4464,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -4326,6 +4576,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -4530,6 +4781,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -4579,6 +4831,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4628,6 +4890,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -4641,6 +4904,7 @@ def run( training_fraction_split: float = 0.8, test_fraction_split: float = 0.2, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the AutoML Image training job and returns a model. @@ -4669,6 +4933,16 @@ def run( Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync: bool = True Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4681,6 +4955,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Video Training is already scheduled to run.") @@ -4692,6 +4971,7 @@ def run( training_fraction_split=training_fraction_split, test_fraction_split=test_fraction_split, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -4702,6 +4982,7 @@ def _run( training_fraction_split: float = 0.8, test_fraction_split: float = 0.2, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -4732,6 +5013,16 @@ def _run( characters. If a `base_model` was provided, the display_name in the base_model will be overritten with this value. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4754,6 +5045,7 @@ def _run( # gca Model to be trained model_tbt = gca_model.Model(encryption_spec=self._model_encryption_spec) model_tbt.display_name = model_display_name or self._display_name + model_tbt.labels = model_labels or self._labels return self._run_job( training_task_definition=training_task_definition, @@ -4790,6 +5082,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -4833,6 +5126,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4864,6 +5167,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -4908,6 +5212,7 @@ def run( validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -4941,6 +5246,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4952,6 +5267,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Text Training is already scheduled to run.") @@ -4964,6 +5284,7 @@ def run( validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -4975,6 +5296,7 @@ def _run( validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -5010,6 +5332,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -5020,11 +5352,9 @@ def _run( produce a Vertex AI Model. """ - if model_display_name is None: - model_display_name = self._display_name - model = gca_model.Model( - display_name=model_display_name, + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, encryption_spec=self._model_encryption_spec, ) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index bf57c86908..d239b9b441 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -22,7 +22,7 @@ from collections import namedtuple import logging import re -from typing import Any, Match, Optional, Type, TypeVar, Tuple +from typing import Any, Dict, Match, Optional, Type, TypeVar, Tuple from google.api_core import client_options from google.api_core import gapic_v1 @@ -239,6 +239,22 @@ def validate_display_name(display_name: str): raise ValueError("Display name needs to be less than 128 characters.") +def validate_labels(labels: Dict[str, str]): + """Validate labels. + + Args: + labels: labels to verify + Raises: + ValueError: if labels is not a mapping of string key value pairs. + """ + for k, v in labels.items(): + if not isinstance(k, str) or not isinstance(v, str): + raise ValueError( + "Expect labels to be a mapping of string key value pairs. " + 'Got "{}".'.format(labels) + ) + + def validate_region(region: str) -> bool: """Validates region against supported regions. diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py index d7b2e85001..d699563327 100644 --- a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py @@ -103,6 +103,8 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} _TEST_TRAINING_FRACTION_SPLIT = 0.8 _TEST_VALIDATION_FRACTION_SPLIT = 0.1 _TEST_TEST_FRACTION_SPLIT = 0.1 @@ -228,6 +230,7 @@ def test_run_call_pipeline_service_create( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -241,6 +244,7 @@ def test_run_call_pipeline_service_create( data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, @@ -263,7 +267,9 @@ def test_run_call_pipeline_service_create( test_fraction=_TEST_TEST_FRACTION_SPLIT, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, @@ -275,6 +281,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -300,7 +307,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_time_series, @@ -313,6 +320,7 @@ def test_run_call_pipeline_if_no_model_display_name( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -347,7 +355,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, @@ -356,6 +366,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_image_training_jobs.py b/tests/unit/aiplatform/test_automl_image_training_jobs.py index 29ce61a8a1..a46f960b1c 100644 --- a/tests/unit/aiplatform/test_automl_image_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_image_training_jobs.py @@ -46,6 +46,9 @@ _TEST_MODEL_DISPLAY_NAME = "model-display-name" _TEST_MODEL_ID = "98777645321" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_TRAINING_TASK_INPUTS = json_format.ParseDict( { "modelType": "CLOUD", @@ -251,12 +254,15 @@ def test_run_call_pipeline_service_create( ) job = training_jobs.AutoMLImageTrainingJob( - display_name=_TEST_DISPLAY_NAME, base_model=mock_model_image + display_name=_TEST_DISPLAY_NAME, + base_model=mock_model_image, + labels=_TEST_LABELS, ) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -276,6 +282,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=mock_model_image._gca_resource.labels, description=mock_model_image._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -286,6 +293,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, model_to_upload=true_managed_model, @@ -307,7 +315,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_image, @@ -318,6 +326,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) @@ -342,7 +351,9 @@ def test_run_call_pipeline_if_no_model_display_name( # Test that if defaults to the job display name true_managed_model = gca_model.Model( - display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC + display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, + encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( @@ -351,6 +362,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py index 02ddad688b..2c380206e4 100644 --- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py @@ -136,6 +136,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_TRAINING_FRACTION_SPLIT = 0.6 _TEST_VALIDATION_FRACTION_SPLIT = 0.2 _TEST_TEST_FRACTION_SPLIT = 0.2 @@ -308,6 +312,7 @@ def test_run_call_pipeline_service_create( job = training_jobs.AutoMLTabularTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -319,6 +324,7 @@ def test_run_call_pipeline_service_create( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -344,6 +350,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -357,6 +364,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_tabular, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -476,7 +484,7 @@ def test_run_call_pipeline_service_create_with_export_eval_data_items( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_tabular, @@ -487,6 +495,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLTabularTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -522,7 +531,9 @@ def test_run_call_pipeline_if_no_model_display_name( # Test that if defaults to the job display name true_managed_model = gca_model.Model( - display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC + display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, + encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( @@ -531,6 +542,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_tabular, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_text_training_jobs.py b/tests/unit/aiplatform/test_automl_text_training_jobs.py index 4d7cd60527..583789c00e 100644 --- a/tests/unit/aiplatform/test_automl_text_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_text_training_jobs.py @@ -42,6 +42,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_MODEL_ID = "98777645321" _TEST_TRAINING_TASK_INPUTS_CLASSIFICATION = training_job_inputs.AutoMlTextClassificationInputs( @@ -319,6 +323,7 @@ def test_run_call_pipeline_service_create_classification( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, @@ -328,6 +333,7 @@ def test_run_call_pipeline_service_create_classification( model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -345,6 +351,7 @@ def test_run_call_pipeline_service_create_classification( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) @@ -354,6 +361,7 @@ def test_run_call_pipeline_service_create_classification( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, @@ -388,12 +396,14 @@ def test_run_call_pipeline_service_create_extraction( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_EXTRACTION, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -409,7 +419,9 @@ def test_run_call_pipeline_service_create_extraction( test_fraction=_TEST_FRACTION_SPLIT_TEST, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -417,6 +429,7 @@ def test_run_call_pipeline_service_create_extraction( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_extraction, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_EXTRACTION, model_to_upload=true_managed_model, @@ -450,6 +463,7 @@ def test_run_call_pipeline_service_create_sentiment( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=10, ) @@ -457,6 +471,7 @@ def test_run_call_pipeline_service_create_sentiment( model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -472,7 +487,9 @@ def test_run_call_pipeline_service_create_sentiment( test_fraction=_TEST_FRACTION_SPLIT_TEST, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -480,6 +497,7 @@ def test_run_call_pipeline_service_create_sentiment( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_sentiment, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT, model_to_upload=true_managed_model, @@ -500,7 +518,7 @@ def test_run_call_pipeline_service_create_sentiment( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_text, @@ -514,6 +532,7 @@ def test_run_call_pipeline_if_no_model_display_name( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -535,7 +554,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -543,6 +564,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_video_training_jobs.py b/tests/unit/aiplatform/test_automl_video_training_jobs.py index b3087d0eed..fc7d6f38e3 100644 --- a/tests/unit/aiplatform/test_automl_video_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_video_training_jobs.py @@ -43,6 +43,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_MODEL_ID = "98777645321" # TODO _TEST_TRAINING_TASK_INPUTS = json_format.ParseDict( @@ -290,6 +294,7 @@ def test_run_call_pipeline_service_create( job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, @@ -299,6 +304,7 @@ def test_run_call_pipeline_service_create( model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, @@ -314,6 +320,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=mock_model._gca_resource.description, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) @@ -324,6 +331,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -345,7 +353,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_video, @@ -356,6 +364,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, ) @@ -375,7 +384,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_video.name, @@ -383,6 +394,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 363ad18048..da4fc1fbe7 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -87,6 +87,8 @@ _TEST_TIMEOUT = 8000 _TEST_RESTART_JOB_ON_WORKER_RESTART = True +_TEST_LABELS = {"my_key": "my_value"} + _TEST_BASE_CUSTOM_JOB_PROTO = gca_custom_job_compat.CustomJob( display_name=_TEST_DISPLAY_NAME, job_spec=gca_custom_job_compat.CustomJobSpec( @@ -101,6 +103,7 @@ service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, ), + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -228,6 +231,7 @@ def test_create_custom_job(self, create_custom_job_mock, get_custom_job_mock, sy display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run( @@ -271,6 +275,7 @@ def test_run_custom_job_with_fail_raises( display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) with pytest.raises(RuntimeError) as e: @@ -395,6 +400,7 @@ def test_create_from_local_script( script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run(sync=sync) @@ -441,6 +447,7 @@ def test_create_custom_job_with_tensorboard( display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run( diff --git a/tests/unit/aiplatform/test_datasets.py b/tests/unit/aiplatform/test_datasets.py index 3457ccc7bd..7864ca0d35 100644 --- a/tests/unit/aiplatform/test_datasets.py +++ b/tests/unit/aiplatform/test_datasets.py @@ -144,6 +144,8 @@ _TEST_LIST_FILTER = 'display_name="abc"' _TEST_LIST_ORDER_BY = "create_time desc" +_TEST_LABELS = {"my_key": "my_value"} + @pytest.fixture def get_dataset_mock(): @@ -946,6 +948,34 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + @pytest.mark.usefixtures("get_dataset_image_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + ) + + my_dataset = datasets.ImageDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_IMAGE, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestTabularDataset: def setup_method(self): @@ -1165,6 +1195,35 @@ def test_tabular_dataset_column_name_bigquery(self): ] ) + @pytest.mark.usefixtures("get_dataset_tabular_bq_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + + my_dataset = datasets.TabularDataset.create( + display_name=_TEST_DISPLAY_NAME, + bq_source=_TEST_SOURCE_URI_BQ, + labels=_TEST_LABELS, + encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_TABULAR, + metadata=_TEST_METADATA_TABULAR_BQ, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestTextDataset: def setup_method(self): @@ -1364,6 +1423,34 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + @pytest.mark.usefixtures("get_dataset_text_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + ) + + my_dataset = datasets.TextDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_TEXT, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestVideoDataset: def setup_method(self): @@ -1525,3 +1612,31 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + + @pytest.mark.usefixtures("get_dataset_video_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME + ) + + my_dataset = datasets.VideoDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_VIDEO, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 0ae76ea988..9dfc2db481 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -167,6 +167,8 @@ _TEST_LIST_ORDER_BY_CREATE_TIME = "create_time desc" _TEST_LIST_ORDER_BY_DISPLAY_NAME = "display_name" +_TEST_LABELS = {"my_key": "my_value"} + @pytest.fixture def get_endpoint_mock(): @@ -527,6 +529,22 @@ def test_create_with_description(self, create_endpoint_mock, sync): parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), ) + @pytest.mark.usefixtures("get_endpoint_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_with_labels(self, create_endpoint_mock, sync): + my_endpoint = models.Endpoint.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync + ) + if not sync: + my_endpoint.wait() + + expected_endpoint = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) + create_endpoint_mock.assert_called_once_with( + parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), + ) + @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock") @pytest.mark.parametrize("sync", [True, False]) def test_deploy(self, deploy_model_mock, sync): diff --git a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py index 752d39a93c..d82071db4f 100644 --- a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py +++ b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py @@ -78,6 +78,7 @@ _TEST_SEARCH_ALGORITHM = "random" _TEST_MEASUREMENT_SELECTION = "best" +_TEST_LABELS = {"my_hp_key": "my_hp_value"} _TEST_BASE_HYPERPARAMETER_TUNING_JOB_PROTO = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob( display_name=_TEST_DISPLAY_NAME, @@ -123,6 +124,7 @@ max_trial_count=_TEST_MAX_TRIAL_COUNT, max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, trial_job_spec=test_custom_job._TEST_BASE_CUSTOM_JOB_PROTO.job_spec, + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -283,6 +285,7 @@ def test_create_hyperparameter_tuning_job( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) job.run( @@ -345,6 +348,7 @@ def test_run_hyperparameter_tuning_job_with_fail_raises( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) with pytest.raises(RuntimeError): @@ -524,6 +528,7 @@ def test_create_hyperparameter_tuning_job_with_tensorboard( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) job.run( diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 600b880d14..89a6dc9ca7 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -599,6 +599,42 @@ def test_upload_uploads_and_gets_model( get_model_mock.assert_called_once_with(name=_TEST_MODEL_RESOURCE_NAME) + @pytest.mark.parametrize("sync", [True, False]) + def test_upload_uploads_and_gets_model_with_labels( + self, upload_model_mock, get_model_mock, sync + ): + + my_model = models.Model.upload( + display_name=_TEST_MODEL_NAME, + serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, + serving_container_predict_route=_TEST_SERVING_CONTAINER_PREDICTION_ROUTE, + serving_container_health_route=_TEST_SERVING_CONTAINER_HEALTH_ROUTE, + labels=_TEST_LABEL, + sync=sync, + ) + + if not sync: + my_model.wait() + + container_spec = gca_model.ModelContainerSpec( + image_uri=_TEST_SERVING_CONTAINER_IMAGE, + predict_route=_TEST_SERVING_CONTAINER_PREDICTION_ROUTE, + health_route=_TEST_SERVING_CONTAINER_HEALTH_ROUTE, + ) + + managed_model = gca_model.Model( + display_name=_TEST_MODEL_NAME, + container_spec=container_spec, + labels=_TEST_LABEL, + ) + + upload_model_mock.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + model=managed_model, + ) + + get_model_mock.assert_called_once_with(name=_TEST_MODEL_RESOURCE_NAME) + def test_upload_raises_with_impartial_explanation_spec(self): with pytest.raises(ValueError) as e: @@ -633,6 +669,7 @@ def test_upload_uploads_and_gets_model_with_all_args( serving_container_ports=_TEST_SERVING_CONTAINER_PORTS, explanation_metadata=_TEST_EXPLANATION_METADATA, explanation_parameters=_TEST_EXPLANATION_PARAMETERS, + labels=_TEST_LABEL, sync=sync, ) @@ -673,6 +710,7 @@ def test_upload_uploads_and_gets_model_with_all_args( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), + labels=_TEST_LABEL, ) upload_model_with_explanations_mock.assert_called_once_with( diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index c639c462cb..15824d3172 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -96,6 +96,9 @@ _TEST_INVALID_ACCELERATOR_TYPE = "NVIDIA_DOES_NOT_EXIST" _TEST_ACCELERATOR_COUNT = 1 _TEST_MODEL_DISPLAY_NAME = "model-display-name" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_DEFAULT_TRAINING_FRACTION_SPLIT = 0.8 _TEST_DEFAULT_VALIDATION_FRACTION_SPLIT = 0.1 _TEST_DEFAULT_TEST_FRACTION_SPLIT = 0.1 @@ -630,6 +633,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, script_path=_TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -656,6 +660,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -723,6 +728,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -761,6 +767,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( ), model_to_upload=true_managed_model, input_data_config=true_input_data_config, + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -1588,7 +1595,7 @@ def test_get_and_return_subclass_custom(self): assert isinstance(subcls, aiplatform.training_jobs.CustomTrainingJob) @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_nontabular_dataset( + def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -1605,6 +1612,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, script_path=_TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -1628,7 +1636,6 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, ) @@ -1685,7 +1692,8 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ) true_managed_model = gca_model.Model( - display_name=_TEST_MODEL_DISPLAY_NAME, + display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -1706,6 +1714,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -1846,6 +1855,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomContainerTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, command=_TEST_TRAINING_CONTAINER_CMD, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -1870,6 +1880,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -1931,6 +1942,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -1954,6 +1966,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -2645,6 +2658,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomContainerTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, command=_TEST_TRAINING_CONTAINER_CMD, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -2671,6 +2685,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, sync=sync, ) @@ -2721,6 +2736,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -2755,6 +2771,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ), model_to_upload=true_managed_model, input_data_config=true_input_data_config, + labels=_TEST_LABELS, ) mock_pipeline_service_create.assert_called_once_with( @@ -3080,6 +3097,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -3099,6 +3117,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( model_from_job = job.run( dataset=mock_tabular_dataset, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, base_output_dir=_TEST_BASE_OUTPUT_DIR, service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, @@ -3167,6 +3186,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -3190,6 +3210,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -3227,7 +3248,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_display_name( + def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -3243,6 +3264,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -3322,6 +3344,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -3345,6 +3368,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -4032,7 +4056,7 @@ def test_run_call_pipeline_service_create_distributed_training( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_nontabular_dataset( + def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -4047,6 +4071,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -4071,7 +4096,6 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - model_display_name=_TEST_MODEL_DISPLAY_NAME, service_account=_TEST_SERVICE_ACCOUNT, tensorboard=_TEST_TENSORBOARD_RESOURCE_NAME, sync=sync, @@ -4124,7 +4148,8 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ) true_managed_model = gca_model.Model( - display_name=_TEST_MODEL_DISPLAY_NAME, + display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -4145,6 +4170,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { diff --git a/tests/unit/aiplatform/test_utils.py b/tests/unit/aiplatform/test_utils.py index 068575fc51..ed85fb9f0a 100644 --- a/tests/unit/aiplatform/test_utils.py +++ b/tests/unit/aiplatform/test_utils.py @@ -257,6 +257,20 @@ def test_validate_display_name(): aiplatform.utils.validate_display_name("my_model_abc") +def test_validate_labels_raises_value_not_str(): + with pytest.raises(ValueError): + aiplatform.utils.validate_labels({"my_key1": 1, "my_key2": 2}) + + +def test_validate_labels_raises_key_not_str(): + with pytest.raises(ValueError): + aiplatform.utils.validate_labels({1: "my_value1", 2: "my_value2"}) + + +def test_validate_labels(): + aiplatform.utils.validate_labels({"my_key1": "my_value1", "my_key2": "my_value2"}) + + @pytest.mark.parametrize( "accelerator_type, expected", [