From bfb4f6350d47d3c4bc048a9bcbe491bf6b948cb3 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 12 Oct 2021 02:55:46 -0700 Subject: [PATCH 01/36] feat: Support uploading local models --- google/cloud/aiplatform/models.py | 14 +++++ google/cloud/aiplatform/utils/__init__.py | 65 ++++++++++++++++++++++- 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 2ce48adc53..3246385259 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -1492,6 +1492,7 @@ def upload( credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, sync=True, ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model @@ -1635,6 +1636,9 @@ def upload( If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. + staging_bucket (str): + Optional. Bucket to stage local model artifacts. Overrides + staging_bucket set in aiplatform.init. Returns: model: Instantiated representation of the uploaded model resource. Raises: @@ -1697,6 +1701,16 @@ def upload( encryption_spec=encryption_spec, ) + if artifact_uri and not artifact_uri.startswith("gs://"): + staged_data_uri = utils.stage_local_data_in_gcs( + data_path=artifact_uri, + staging_gcs_dir=staging_bucket, + project=project, + location=location, + credentials=credentials, + ) + artifact_uri = staged_data_uri + if artifact_uri: managed_model.artifact_uri = artifact_uri diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index d239b9b441..3daa74f1fd 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -581,7 +581,7 @@ def _timestamped_copy_to_gcs( blob_path = "-".join(["aiplatform", timestamp, local_file_name]) if gcs_blob_prefix: - blob_path = "/".join([gcs_blob_prefix, blob_path]) + blob_path = "/".join([gcs_blob_prefix, blob_path, local_file_name]) # TODO(b/171202993) add user agent client = storage.Client(project=project, credentials=credentials) @@ -591,3 +591,66 @@ def _timestamped_copy_to_gcs( gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])]) return gcs_path + + +def stage_local_data_in_gcs( + data_path: str, + staging_gcs_dir: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, +) -> str: + """Stages a local data in GCS. + + The file copied to GCS is the name of the local file prepended with an + "aiplatform-{timestamp}-" string. + + Args: + data_path: Required. Path of the local data to copy to GCS. + staging_gcs_dir: + Optional. Google Cloud Storage bucket to be used for data staging. + project: Optional. Google Cloud Project that contains the staging bucket. + location: Optional. Google Cloud location to use for the staging bucket. + credentials: The custom credentials to use when making API calls. + If not provided, default credentials will be used. + + Returns: + Google Cloud Storage URI of the staged data. + """ + data_path_obj = pathlib.Path(data_path) + + if not data_path_obj.exists(): + raise RuntimeError(f"Local data does not exist: data_path='{data_path}'") + + staging_gcs_dir = staging_gcs_dir or initializer.global_config.staging_bucket + if not staging_gcs_dir: + project = project or initializer.global_config.project + location = location or initializer.global_config.location + staging_gcs_dir = "gs://" + project + "-staging" + # Creating the bucket if it does not exist. + # Currently we only do this when staging_gcs_dir is not specified. + staging_bucket_name, _ = extract_bucket_and_prefix_from_gcs_path(staging_gcs_dir) + client = storage.Client(project=project, credentials=credentials) + staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) + if not staging_bucket.exists(): + staging_bucket = client.create_bucket( + bucket_or_name=staging_bucket, + project =project , + location=location, + ) + + staging_root_dir = staging_gcs_dir.rstrip("/") + "/vertex_ai_auto_staging/" + + if data_path_obj.is_dir(): + raise NotImplementedError("Uploading directories is not supported yet.") + + _timestamped_gcs_dir + + staged_data_uri = _timestamped_copy_to_gcs( + local_file_path=data_path, + gcs_dir=staging_root_dir, + project=project or initializer.global_config.project, + credentials=credentials or initializer.global_config.credentials, + ) + + return staged_data_uri From a62c264d823f1fbdde7734e4c63e3eab1f04ffb9 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 14 Oct 2021 23:23:12 -0700 Subject: [PATCH 02/36] feat: Added the upload_to_gcs utility function It can properly upload both files and directories. --- google/cloud/aiplatform/utils/__init__.py | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index 3daa74f1fd..401c4e1ecf 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -18,6 +18,7 @@ import abc import datetime +import glob import pathlib from collections import namedtuple import logging @@ -56,6 +57,8 @@ accelerator_type as gca_accelerator_type, ) +_logger = logging.getLogger(__name__) + VertexAiServiceClient = TypeVar( "VertexAiServiceClient", # v1beta1 @@ -593,6 +596,58 @@ def _timestamped_copy_to_gcs( return gcs_path +def upload_to_gcs( + source_path: str, + destination_uri: str, + project: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, +): + """Uploads local files to GCS. + + After upload the `destination_uri` will contain the same data as the `source_path`. + + Args: + source_path: Required. Path of the local data to copy to GCS. + destination_uri: Required. GCS URI where the data should be uploaded. + project: Optional. Google Cloud Project that contains the staging bucket. + credentials: The custom credentials to use when making API calls. + If not provided, default credentials will be used. + """ + source_path_obj = pathlib.Path(source_path) + if not source_path_obj.exists(): + raise RuntimeError(f"Source path does not exist: {source_path}") + + storage_client = storage.Client(project=project, credentials=credentials) + if source_path_obj.is_dir(): + source_file_paths = glob.glob( + pathname=str(source_path_obj / "**"), recursive=True + ) + for source_file_path in source_file_paths: + source_file_path_obj = pathlib.Path(source_file_path) + if source_file_path_obj.is_dir(): + continue + source_file_relative_path_obj = source_file_path_obj.relative_to( + source_path_obj + ) + source_file_relative_posix_path = source_file_relative_path_obj.as_posix() + destination_file_uri = ( + destination_uri.rstrip("/") + "/" + source_file_relative_posix_path + ) + _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') + destination_blob = storage.Blob.from_string( + destination_file_uri, client=storage_client + ) + destination_blob.upload_from_filename(filename=source_file_path) + else: + source_file_path = source_path + destination_file_uri = destination_uri + _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') + destination_blob = storage.Blob.from_string( + destination_file_uri, client=storage_client + ) + destination_blob.upload_from_filename(filename=source_file_path) + + def stage_local_data_in_gcs( data_path: str, staging_gcs_dir: Optional[str] = None, From cf97d3bb9f1f42429c46fa510d5efa360e323e92 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 14 Oct 2021 23:31:34 -0700 Subject: [PATCH 03/36] feat: Support staging directories in stage_local_data_in_gcs Switched the stage_local_data_in_gcs function to use upload_to_gcs --- google/cloud/aiplatform/utils/__init__.py | 34 ++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index 401c4e1ecf..36b2f50348 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -584,7 +584,7 @@ def _timestamped_copy_to_gcs( blob_path = "-".join(["aiplatform", timestamp, local_file_name]) if gcs_blob_prefix: - blob_path = "/".join([gcs_blob_prefix, blob_path, local_file_name]) + blob_path = "/".join([gcs_blob_prefix, blob_path]) # TODO(b/171202993) add user agent client = storage.Client(project=project, credentials=credentials) @@ -681,31 +681,33 @@ def stage_local_data_in_gcs( if not staging_gcs_dir: project = project or initializer.global_config.project location = location or initializer.global_config.location - staging_gcs_dir = "gs://" + project + "-staging" # Creating the bucket if it does not exist. # Currently we only do this when staging_gcs_dir is not specified. - staging_bucket_name, _ = extract_bucket_and_prefix_from_gcs_path(staging_gcs_dir) + staging_bucket_name = project + "-staging" client = storage.Client(project=project, credentials=credentials) staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) if not staging_bucket.exists(): + _logger.info(f'Creating staging GCS bucket "{staging_bucket_name}"') staging_bucket = client.create_bucket( - bucket_or_name=staging_bucket, - project =project , - location=location, + bucket_or_name=staging_bucket, project=project, location=location, ) + staging_gcs_dir = "gs://" + staging_bucket_name - staging_root_dir = staging_gcs_dir.rstrip("/") + "/vertex_ai_auto_staging/" - - if data_path_obj.is_dir(): - raise NotImplementedError("Uploading directories is not supported yet.") + timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds") + staging_gcs_subdir = ( + staging_gcs_dir.rstrip("/") + "/vertex_ai_auto_staging/" + timestamp + ) - _timestamped_gcs_dir + staged_data_uri = staging_gcs_subdir + if data_path_obj.is_file(): + staged_data_uri = staging_gcs_subdir + "/" + data_path_obj.name - staged_data_uri = _timestamped_copy_to_gcs( - local_file_path=data_path, - gcs_dir=staging_root_dir, - project=project or initializer.global_config.project, - credentials=credentials or initializer.global_config.credentials, + _logger.info(f'Uploading "{data_path}" to "{staged_data_uri}"') + upload_to_gcs( + source_path=data_path, + destination_uri=staged_data_uri, + project=project, + credentials=credentials, ) return staged_data_uri From dd35ac4fde4443b48a570b91a816ab6cfd97adc5 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 15 Oct 2021 00:54:05 -0700 Subject: [PATCH 04/36] feat: Validate model directories before uploading --- google/cloud/aiplatform/models.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 3246385259..297c32af62 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import pathlib import proto from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Union @@ -49,6 +50,15 @@ _LOGGER = base.Logger(__name__) +_SUPPORTED_MODEL_FILE_NAMES = [ + "model.pkl", + "model.joblib", + "model.bst", + "saved_model.pb", + "saved_model.pbtxt", +] + + class Prediction(NamedTuple): """Prediction class envelopes returned Model predictions and the Model id. @@ -1702,8 +1712,26 @@ def upload( ) if artifact_uri and not artifact_uri.startswith("gs://"): + # Validating the model directory + model_dir = pathlib.Path(artifact_uri) + if not model_dir.exists(): + raise ValueError(f"artifact_uri path does not exist: '{artifact_uri}'") + if not model_dir.is_dir(): + raise ValueError( + f"artifact_uri path must be a directory: '{artifact_uri}'" + ) + if not any( + (model_dir / file_name).exists() + for file_name in _SUPPORTED_MODEL_FILE_NAMES + ): + raise ValueError( + "artifact_uri directory does not contain any supported model files. " + f"The upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" + ) + + # Uploading the model staged_data_uri = utils.stage_local_data_in_gcs( - data_path=artifact_uri, + data_path=str(model_dir), staging_gcs_dir=staging_bucket, project=project, location=location, From d554bbf884cbf75a752f515cf9a1f9f83d8f0817 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 15 Oct 2021 19:05:14 -0700 Subject: [PATCH 05/36] feat: Added framework-specific model uploaders for XGBoost, Scikit-learn and Tensorflow --- google/cloud/aiplatform/models.py | 275 ++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 297c32af62..b9fa446b37 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -16,6 +16,8 @@ # import pathlib import proto +import shutil +import tempfile from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Union from google.api_core import operation @@ -2431,3 +2433,276 @@ def export_model( _LOGGER.log_action_completed_against_resource("model", "exported", self) return json_format.MessageToDict(operation_future.metadata.output_info._pb) + + @staticmethod + def upload_xgboost_model_file( + model_file_path: str, + xgboost_version: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + instance_schema_uri: Optional[str] = None, + parameters_schema_uri: Optional[str] = None, + prediction_schema_uri: Optional[str] = None, + explanation_metadata: Optional[explain.ExplanationMetadata] = None, + explanation_parameters: Optional[explain.ExplanationParameters] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, + sync=True, + ): + # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#xgboost + XGBOOST_SUPPORTED_VERSIONS = ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"] + XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE = ( + "{registry}/vertex-ai/prediction/xgboost-{cpu_or_gpu}.{version}:latest" + ) + + XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS = [ + ".pkl", + ".joblib", + ".bst", + ] + + if xgboost_version is None: + # Using the latest version + xgboost_version = XGBOOST_SUPPORTED_VERSIONS[-1] + _LOGGER.info(f"Using the {xgboost_version} version of XGBoost.") + + if xgboost_version not in XGBOOST_SUPPORTED_VERSIONS: + _LOGGER.error( + f"XGBoost version {version} is not supported. " + f"Supported versions: {XGBOOST_SUPPORTED_VERSIONS}" + ) + + model_file_path_obj = pathlib.Path(model_file_path) + if not model_file_path_obj.is_file(): + raise ValueError( + f"model_file_path path must point to a file: '{model_file_path}'" + ) + + model_file_extension = model_file_path_obj.suffix + if model_file_extension not in XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS: + _LOGGER.warning( + f"Only the following XGBoost model file extensions are currently supported: '{XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS}'" + ) + _LOGGER.warning( + "Treating the model file as a binary serialized XGBoost Booster." + ) + model_file_extension = ".bst" + + # Preparing model directory + prepared_model_dir = tempfile.mkdtemp() + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) + + container_image_uri = XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE.format( + registry=_get_container_registry( + location or aiplatform.initializer.global_config.location + ), + cpu_or_gpu="cpu", + version=xgboost_version.replace(".", "-"), + ) + + display_name = display_name or "XGBoost model" + + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) + + @staticmethod + def upload_scikit_learn_model_file( + model_file_path: str, + sklearn_version: Optional[str] = None, + display_name: Optional[str] = None, + description: Optional[str] = None, + instance_schema_uri: Optional[str] = None, + parameters_schema_uri: Optional[str] = None, + prediction_schema_uri: Optional[str] = None, + explanation_metadata: Optional[explain.ExplanationMetadata] = None, + explanation_parameters: Optional[explain.ExplanationParameters] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, + sync=True, + ): + # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#scikit-learn + SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24"] + SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE = ( + "{registry}/vertex-ai/prediction/sklearn-{cpu_or_gpu}.{version}:latest" + ) + SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS = [ + ".pkl", + ".joblib", + ] + + if sklearn_version is None: + # Using the latest version + sklearn_version = SKLEARN_SUPPORTED_VERSIONS[-1] + _LOGGER.info(f"Using the {sklearn_version} version of Scikit-learn.") + + if sklearn_version not in SKLEARN_SUPPORTED_VERSIONS: + _LOGGER.error( + f"Scikit-learn version {version} is not supported. " + f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" + ) + + model_file_path_obj = pathlib.Path(model_file_path) + if not model_file_path_obj.is_file(): + raise ValueError( + f"model_file_path path must point to a file: '{model_file_path}'" + ) + + model_file_extension = model_file_path_obj.suffix + if model_file_extension not in SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS: + _LOGGER.warning( + f"Only the following Scikit-learn model file extensions are currently supported: '{SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS}'" + ) + _LOGGER.warning( + "Treating the model file as a pickle serialized Scikit-learn model." + ) + model_file_extension = ".pkl" + + # Preparing model directory + prepared_model_dir = tempfile.mkdtemp() + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) + + container_image_uri = SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE.format( + registry=_get_container_registry( + location or aiplatform.initializer.global_config.location + ), + cpu_or_gpu="cpu", + version=sklearn_version.replace(".", "-"), + ) + + display_name = display_name or "Scikit-learn model" + + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) + + @staticmethod + def upload_tensorflow_saved_model( + saved_model_dir: str, + tensorflow_version: Optional[str] = None, + use_gpu: bool = False, + display_name: Optional[str] = None, + description: Optional[str] = None, + instance_schema_uri: Optional[str] = None, + parameters_schema_uri: Optional[str] = None, + prediction_schema_uri: Optional[str] = None, + explanation_metadata: Optional[explain.ExplanationMetadata] = None, + explanation_parameters: Optional[explain.ExplanationParameters] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, + sync=True, + ): + # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#tensorflow + TENSORFLOW_SUPPORTED_VERSIONS = [ + "0.15", + "2.1", + "2.2", + "2.3", + "2.4", + "2.5", + "2.6", + ] + TENSORFLOW_CONTAINER_IMAGE_URI_TEMPLATE = ( + "{registry}/vertex-ai/prediction/tf{tf2_or_1}-{cpu_or_gpu}.{version}:latest" + ) + + if tensorflow_version is None: + # Using the latest version + tensorflow_version = TENSORFLOW_SUPPORTED_VERSIONS[-1] + _LOGGER.info(f"Using the {tensorflow_version} version of Tensorflow.") + + if tensorflow_version not in TENSORFLOW_SUPPORTED_VERSIONS: + _LOGGER.error( + f"Tensorflow version {version} is not supported. " + f"Supported versions: {TENSORFLOW_SUPPORTED_VERSIONS}" + ) + + container_image_uri = TENSORFLOW_CONTAINER_IMAGE_URI_TEMPLATE.format( + registry=_get_container_registry( + location or aiplatform.initializer.global_config.location + ), + tf2_or_1=("2" if tensorflow_version.startswith("2.") else ""), + cpu_or_gpu="gpu" if use_gpu else "cpu", + version=tensorflow_version.replace(".", "-"), + ) + + display_name = display_name or "Tensorflow model" + + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=saved_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) + + +def _get_container_registry(location: Optional[str] = None,) -> str: + location = location or "us-" + if location.startswith("us-"): + return "us-docker.pkg.dev" + elif location.startswith("europe-"): + return "europe-docker.pkg.dev" + elif location.startswith("asia-"): + return "asia-docker.pkg.dev" + else: + raise ValueError(f"Unrecognized location: {location}") From 650b7673f9c8acd3a20d5623a1c5238c19974f5f Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 19 Oct 2021 13:52:30 -0700 Subject: [PATCH 06/36] Added docstrings --- google/cloud/aiplatform/models.py | 346 ++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index b9fa446b37..8f05e2b04a 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2453,6 +2453,120 @@ def upload_xgboost_model_file( staging_bucket: Optional[str] = None, sync=True, ): + """Uploads a model and returns a Model representing the uploaded Model + resource. + + Note: This function is *experimental* and can be changed in the future. + + Example usage:: + + my_model = Model.upload_xgboost_model_file( + model_file_path="iris.xgboost_model.bst" + ) + + Args: + model_file_path (str): Required. Local file path of the model. + xgboost_version (str): Optional. The version of the XGBoost serving container. + Supported versions: ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"]. + If the version is not specified, the latest version is used. + display_name (str): + Optional. The display name of the Model. The name can be up to 128 + characters long and can be consist of any UTF-8 characters. + description (str): + The description of the model. + instance_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single instance, which + are used in + ``PredictRequest.instances``, + ``ExplainRequest.instances`` + and + ``BatchPredictionJob.input_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + parameters_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the parameters of prediction and + explanation via + ``PredictRequest.parameters``, + ``ExplainRequest.parameters`` + and + ``BatchPredictionJob.model_parameters``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform, if no parameters are supported it is set to an + empty string. Note: The URI given on output will be + immutable and probably different, including the URI scheme, + than the one given on input. The output URI will point to a + location where the user only has a read access. + prediction_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single prediction + produced by this Model, which are returned via + ``PredictResponse.predictions``, + ``ExplainResponse.explanations``, + and + ``BatchPredictionJob.output_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + explanation_metadata (explain.ExplanationMetadata): + Optional. Metadata describing the Model's input and output for explanation. + Both `explanation_metadata` and `explanation_parameters` must be + passed together when used. For more details, see + `Ref docs ` + explanation_parameters (explain.ExplanationParameters): + Optional. Parameters to configure explaining for Model's predictions. + For more details, see `Ref docs ` + project: Optional[str]=None, + Project to upload this model to. Overrides project set in + aiplatform.init. + location: Optional[str]=None, + Location to upload this model to. Overrides location set in + aiplatform.init. + credentials: Optional[auth_credentials.Credentials]=None, + Custom credentials to use to upload this model. Overrides credentials + set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this Model and all sub-resources of this Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + staging_bucket (str): + Optional. Bucket to stage local model artifacts. Overrides + staging_bucket set in aiplatform.init. + Returns: + model: Instantiated representation of the uploaded model resource. + Raises: + ValueError: If only `explanation_metadata` or `explanation_parameters` + is specified. + """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#xgboost XGBOOST_SUPPORTED_VERSIONS = ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"] XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE = ( @@ -2547,6 +2661,121 @@ def upload_scikit_learn_model_file( staging_bucket: Optional[str] = None, sync=True, ): + """Uploads a model and returns a Model representing the uploaded Model + resource. + + Note: This function is *experimental* and can be changed in the future. + + Example usage:: + + my_model = Model.upload_scikit_learn_model_file( + model_file_path="iris.sklearn_model.joblib" + ) + + Args: + model_file_path (str): Required. Local file path of the model. + sklearn_version (str): + Optional. The version of the Scikit-learn serving container. + Supported versions: ["0.20", "0.22", "0.23", "0.24"]. + If the version is not specified, the latest version is used. + display_name (str): + Optional. The display name of the Model. The name can be up to 128 + characters long and can be consist of any UTF-8 characters. + description (str): + The description of the model. + instance_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single instance, which + are used in + ``PredictRequest.instances``, + ``ExplainRequest.instances`` + and + ``BatchPredictionJob.input_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + parameters_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the parameters of prediction and + explanation via + ``PredictRequest.parameters``, + ``ExplainRequest.parameters`` + and + ``BatchPredictionJob.model_parameters``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform, if no parameters are supported it is set to an + empty string. Note: The URI given on output will be + immutable and probably different, including the URI scheme, + than the one given on input. The output URI will point to a + location where the user only has a read access. + prediction_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single prediction + produced by this Model, which are returned via + ``PredictResponse.predictions``, + ``ExplainResponse.explanations``, + and + ``BatchPredictionJob.output_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + explanation_metadata (explain.ExplanationMetadata): + Optional. Metadata describing the Model's input and output for explanation. + Both `explanation_metadata` and `explanation_parameters` must be + passed together when used. For more details, see + `Ref docs ` + explanation_parameters (explain.ExplanationParameters): + Optional. Parameters to configure explaining for Model's predictions. + For more details, see `Ref docs ` + project: Optional[str]=None, + Project to upload this model to. Overrides project set in + aiplatform.init. + location: Optional[str]=None, + Location to upload this model to. Overrides location set in + aiplatform.init. + credentials: Optional[auth_credentials.Credentials]=None, + Custom credentials to use to upload this model. Overrides credentials + set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this Model and all sub-resources of this Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + staging_bucket (str): + Optional. Bucket to stage local model artifacts. Overrides + staging_bucket set in aiplatform.init. + Returns: + model: Instantiated representation of the uploaded model resource. + Raises: + ValueError: If only `explanation_metadata` or `explanation_parameters` + is specified. + """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#scikit-learn SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24"] SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE = ( @@ -2640,6 +2869,123 @@ def upload_tensorflow_saved_model( staging_bucket: Optional[str] = None, sync=True, ): + """Uploads a model and returns a Model representing the uploaded Model + resource. + + Note: This function is *experimental* and can be changed in the future. + + Example usage:: + + my_model = Model.upload_scikit_learn_model_file( + model_file_path="iris.tensorflow_model.SavedModel" + ) + + Args: + saved_model_dir (str): Required. + Local directory of the Tensorflow SavedModel. + tensorflow_version (str): + Optional. The version of the Tensorflow serving container. + Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6"]. + If the version is not specified, the latest version is used. + use_gpu (bool): Whether to use GPU for model serving. + display_name (str): + Optional. The display name of the Model. The name can be up to 128 + characters long and can be consist of any UTF-8 characters. + description (str): + The description of the model. + instance_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single instance, which + are used in + ``PredictRequest.instances``, + ``ExplainRequest.instances`` + and + ``BatchPredictionJob.input_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + parameters_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the parameters of prediction and + explanation via + ``PredictRequest.parameters``, + ``ExplainRequest.parameters`` + and + ``BatchPredictionJob.model_parameters``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform, if no parameters are supported it is set to an + empty string. Note: The URI given on output will be + immutable and probably different, including the URI scheme, + than the one given on input. The output URI will point to a + location where the user only has a read access. + prediction_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single prediction + produced by this Model, which are returned via + ``PredictResponse.predictions``, + ``ExplainResponse.explanations``, + and + ``BatchPredictionJob.output_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + explanation_metadata (explain.ExplanationMetadata): + Optional. Metadata describing the Model's input and output for explanation. + Both `explanation_metadata` and `explanation_parameters` must be + passed together when used. For more details, see + `Ref docs ` + explanation_parameters (explain.ExplanationParameters): + Optional. Parameters to configure explaining for Model's predictions. + For more details, see `Ref docs ` + project: Optional[str]=None, + Project to upload this model to. Overrides project set in + aiplatform.init. + location: Optional[str]=None, + Location to upload this model to. Overrides location set in + aiplatform.init. + credentials: Optional[auth_credentials.Credentials]=None, + Custom credentials to use to upload this model. Overrides credentials + set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this Model and all sub-resources of this Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + staging_bucket (str): + Optional. Bucket to stage local model artifacts. Overrides + staging_bucket set in aiplatform.init. + Returns: + model: Instantiated representation of the uploaded model resource. + Raises: + ValueError: If only `explanation_metadata` or `explanation_parameters` + is specified. + """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#tensorflow TENSORFLOW_SUPPORTED_VERSIONS = [ "0.15", From 8f61efb3a7903a6e0ef47d957f26ef3083581c7e Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 20 Oct 2021 02:12:05 -0700 Subject: [PATCH 07/36] Fixed the version variable in error messages --- google/cloud/aiplatform/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 8f05e2b04a..fdaf5cfada 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2586,7 +2586,7 @@ def upload_xgboost_model_file( if xgboost_version not in XGBOOST_SUPPORTED_VERSIONS: _LOGGER.error( - f"XGBoost version {version} is not supported. " + f"XGBoost version {xgboost_version} is not supported. " f"Supported versions: {XGBOOST_SUPPORTED_VERSIONS}" ) @@ -2793,7 +2793,7 @@ def upload_scikit_learn_model_file( if sklearn_version not in SKLEARN_SUPPORTED_VERSIONS: _LOGGER.error( - f"Scikit-learn version {version} is not supported. " + f"Scikit-learn version {sklearn_version} is not supported. " f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" ) @@ -3007,7 +3007,7 @@ def upload_tensorflow_saved_model( if tensorflow_version not in TENSORFLOW_SUPPORTED_VERSIONS: _LOGGER.error( - f"Tensorflow version {version} is not supported. " + f"Tensorflow version {tensorflow_version} is not supported. " f"Supported versions: {TENSORFLOW_SUPPORTED_VERSIONS}" ) From 8c5448dad158f2fdd0a814002658335fb1c64283 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 4 Nov 2021 18:43:21 -0700 Subject: [PATCH 08/36] Only validate the model directory when using a built-in serving container --- google/cloud/aiplatform/models.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index fdaf5cfada..d3a51170e9 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -16,6 +16,7 @@ # import pathlib import proto +import re import shutil import tempfile from typing import Dict, List, NamedTuple, Optional, Sequence, Tuple, Union @@ -1714,22 +1715,24 @@ def upload( ) if artifact_uri and not artifact_uri.startswith("gs://"): - # Validating the model directory model_dir = pathlib.Path(artifact_uri) + # Validating the model directory if not model_dir.exists(): raise ValueError(f"artifact_uri path does not exist: '{artifact_uri}'") - if not model_dir.is_dir(): - raise ValueError( - f"artifact_uri path must be a directory: '{artifact_uri}'" - ) - if not any( - (model_dir / file_name).exists() - for file_name in _SUPPORTED_MODEL_FILE_NAMES - ): - raise ValueError( - "artifact_uri directory does not contain any supported model files. " - f"The upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" - ) + PREBUILT_IMAGE_RE = "(us|europe|asia)-docker.pkg.dev/vertex-ai/prediction/" + if re.match(PREBUILT_IMAGE_RE, serving_container_image_uri): + if not model_dir.is_dir(): + raise ValueError( + f"artifact_uri path must be a directory: '{artifact_uri}'" + ) + if not any( + (model_dir / file_name).exists() + for file_name in _SUPPORTED_MODEL_FILE_NAMES + ): + raise ValueError( + "artifact_uri directory does not contain any supported model files. " + f"The upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" + ) # Uploading the model staged_data_uri = utils.stage_local_data_in_gcs( From 4bbf2f00ccd844e5bae961c8fcd20c552e683da5 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 4 Nov 2021 18:51:11 -0700 Subject: [PATCH 09/36] Added more exception causes to the dostrings --- google/cloud/aiplatform/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index d3a51170e9..7883851558 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -1657,6 +1657,7 @@ def upload( Raises: ValueError: If only `explanation_metadata` or `explanation_parameters` is specified. + Also if model directory does not contain a supported model file. """ utils.validate_display_name(display_name) if labels: @@ -2569,6 +2570,7 @@ def upload_xgboost_model_file( Raises: ValueError: If only `explanation_metadata` or `explanation_parameters` is specified. + Also if model directory does not contain a supported model file. """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#xgboost XGBOOST_SUPPORTED_VERSIONS = ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"] @@ -2778,6 +2780,7 @@ def upload_scikit_learn_model_file( Raises: ValueError: If only `explanation_metadata` or `explanation_parameters` is specified. + Also if model directory does not contain a supported model file. """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#scikit-learn SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24"] @@ -2988,6 +2991,7 @@ def upload_tensorflow_saved_model( Raises: ValueError: If only `explanation_metadata` or `explanation_parameters` is specified. + Also if model directory does not contain a supported model file. """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#tensorflow TENSORFLOW_SUPPORTED_VERSIONS = [ From 171e7379fa15e3904a5c80bd0b1c2d1ba0390ac9 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 4 Nov 2021 19:03:39 -0700 Subject: [PATCH 10/36] Cleaning up the temporary model directory --- google/cloud/aiplatform/models.py | 134 +++++++++++++++--------------- 1 file changed, 68 insertions(+), 66 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 7883851558..881b1882c6 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2595,6 +2595,16 @@ def upload_xgboost_model_file( f"Supported versions: {XGBOOST_SUPPORTED_VERSIONS}" ) + container_image_uri = XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE.format( + registry=_get_container_registry( + location or aiplatform.initializer.global_config.location + ), + cpu_or_gpu="cpu", + version=xgboost_version.replace(".", "-"), + ) + + display_name = display_name or "XGBoost model" + model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( @@ -2612,40 +2622,31 @@ def upload_xgboost_model_file( model_file_extension = ".bst" # Preparing model directory - prepared_model_dir = tempfile.mkdtemp() - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) - - container_image_uri = XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE.format( - registry=_get_container_registry( - location or aiplatform.initializer.global_config.location - ), - cpu_or_gpu="cpu", - version=xgboost_version.replace(".", "-"), - ) - - display_name = display_name or "XGBoost model" + with tempfile.TemporaryDirectory() as prepared_model_dir_obj: + prepared_model_dir = prepared_model_dir_obj.name + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) @staticmethod def upload_scikit_learn_model_file( @@ -2803,6 +2804,16 @@ def upload_scikit_learn_model_file( f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" ) + container_image_uri = SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE.format( + registry=_get_container_registry( + location or aiplatform.initializer.global_config.location + ), + cpu_or_gpu="cpu", + version=sklearn_version.replace(".", "-"), + ) + + display_name = display_name or "Scikit-learn model" + model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( @@ -2820,40 +2831,31 @@ def upload_scikit_learn_model_file( model_file_extension = ".pkl" # Preparing model directory - prepared_model_dir = tempfile.mkdtemp() - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) - - container_image_uri = SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE.format( - registry=_get_container_registry( - location or aiplatform.initializer.global_config.location - ), - cpu_or_gpu="cpu", - version=sklearn_version.replace(".", "-"), - ) - - display_name = display_name or "Scikit-learn model" + with tempfile.TemporaryDirectory() as prepared_model_dir_obj: + prepared_model_dir = prepared_model_dir_obj.name + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) @staticmethod def upload_tensorflow_saved_model( From 7f5407ed21cc18a8f7495e9d896be6da8ebe8ebf Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 4 Nov 2021 19:16:56 -0700 Subject: [PATCH 11/36] Added fallback to default credentials in `upload_to_gcs` --- google/cloud/aiplatform/utils/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index 36b2f50348..2c402899e7 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -617,6 +617,9 @@ def upload_to_gcs( if not source_path_obj.exists(): raise RuntimeError(f"Source path does not exist: {source_path}") + project = project or initializer.global_config.project + credentials = credentials or initializer.global_config.credentials + storage_client = storage.Client(project=project, credentials=credentials) if source_path_obj.is_dir(): source_file_paths = glob.glob( From 33942da463fe4a2fc377391be71915adeb3078dc Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 12 Nov 2021 17:29:51 -0800 Subject: [PATCH 12/36] Moved the stage_local_data_in_gcs and upload_to_gcs functions to gcs_utils module --- google/cloud/aiplatform/models.py | 4 +- google/cloud/aiplatform/utils/__init__.py | 123 ----------------- google/cloud/aiplatform/utils/gcs_utils.py | 149 +++++++++++++++++++++ 3 files changed, 151 insertions(+), 125 deletions(-) create mode 100644 google/cloud/aiplatform/utils/gcs_utils.py diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 881b1882c6..b0ba46d83d 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -32,6 +32,7 @@ from google.cloud.aiplatform import jobs from google.cloud.aiplatform import models from google.cloud.aiplatform import utils +from google.cloud.aiplatform.utils import gcs_utils from google.cloud.aiplatform.compat.services import endpoint_service_client @@ -1736,7 +1737,7 @@ def upload( ) # Uploading the model - staged_data_uri = utils.stage_local_data_in_gcs( + staged_data_uri = gcs_utils.stage_local_data_in_gcs( data_path=str(model_dir), staging_gcs_dir=staging_bucket, project=project, @@ -2803,7 +2804,6 @@ def upload_scikit_learn_model_file( f"Scikit-learn version {sklearn_version} is not supported. " f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" ) - container_image_uri = SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE.format( registry=_get_container_registry( location or aiplatform.initializer.global_config.location diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index 2c402899e7..d239b9b441 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -18,7 +18,6 @@ import abc import datetime -import glob import pathlib from collections import namedtuple import logging @@ -57,8 +56,6 @@ accelerator_type as gca_accelerator_type, ) -_logger = logging.getLogger(__name__) - VertexAiServiceClient = TypeVar( "VertexAiServiceClient", # v1beta1 @@ -594,123 +591,3 @@ def _timestamped_copy_to_gcs( gcs_path = "".join(["gs://", "/".join([blob.bucket.name, blob.name])]) return gcs_path - - -def upload_to_gcs( - source_path: str, - destination_uri: str, - project: Optional[str] = None, - credentials: Optional[auth_credentials.Credentials] = None, -): - """Uploads local files to GCS. - - After upload the `destination_uri` will contain the same data as the `source_path`. - - Args: - source_path: Required. Path of the local data to copy to GCS. - destination_uri: Required. GCS URI where the data should be uploaded. - project: Optional. Google Cloud Project that contains the staging bucket. - credentials: The custom credentials to use when making API calls. - If not provided, default credentials will be used. - """ - source_path_obj = pathlib.Path(source_path) - if not source_path_obj.exists(): - raise RuntimeError(f"Source path does not exist: {source_path}") - - project = project or initializer.global_config.project - credentials = credentials or initializer.global_config.credentials - - storage_client = storage.Client(project=project, credentials=credentials) - if source_path_obj.is_dir(): - source_file_paths = glob.glob( - pathname=str(source_path_obj / "**"), recursive=True - ) - for source_file_path in source_file_paths: - source_file_path_obj = pathlib.Path(source_file_path) - if source_file_path_obj.is_dir(): - continue - source_file_relative_path_obj = source_file_path_obj.relative_to( - source_path_obj - ) - source_file_relative_posix_path = source_file_relative_path_obj.as_posix() - destination_file_uri = ( - destination_uri.rstrip("/") + "/" + source_file_relative_posix_path - ) - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') - destination_blob = storage.Blob.from_string( - destination_file_uri, client=storage_client - ) - destination_blob.upload_from_filename(filename=source_file_path) - else: - source_file_path = source_path - destination_file_uri = destination_uri - _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') - destination_blob = storage.Blob.from_string( - destination_file_uri, client=storage_client - ) - destination_blob.upload_from_filename(filename=source_file_path) - - -def stage_local_data_in_gcs( - data_path: str, - staging_gcs_dir: Optional[str] = None, - project: Optional[str] = None, - location: Optional[str] = None, - credentials: Optional[auth_credentials.Credentials] = None, -) -> str: - """Stages a local data in GCS. - - The file copied to GCS is the name of the local file prepended with an - "aiplatform-{timestamp}-" string. - - Args: - data_path: Required. Path of the local data to copy to GCS. - staging_gcs_dir: - Optional. Google Cloud Storage bucket to be used for data staging. - project: Optional. Google Cloud Project that contains the staging bucket. - location: Optional. Google Cloud location to use for the staging bucket. - credentials: The custom credentials to use when making API calls. - If not provided, default credentials will be used. - - Returns: - Google Cloud Storage URI of the staged data. - """ - data_path_obj = pathlib.Path(data_path) - - if not data_path_obj.exists(): - raise RuntimeError(f"Local data does not exist: data_path='{data_path}'") - - staging_gcs_dir = staging_gcs_dir or initializer.global_config.staging_bucket - if not staging_gcs_dir: - project = project or initializer.global_config.project - location = location or initializer.global_config.location - # Creating the bucket if it does not exist. - # Currently we only do this when staging_gcs_dir is not specified. - staging_bucket_name = project + "-staging" - client = storage.Client(project=project, credentials=credentials) - staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) - if not staging_bucket.exists(): - _logger.info(f'Creating staging GCS bucket "{staging_bucket_name}"') - staging_bucket = client.create_bucket( - bucket_or_name=staging_bucket, project=project, location=location, - ) - staging_gcs_dir = "gs://" + staging_bucket_name - - timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds") - staging_gcs_subdir = ( - staging_gcs_dir.rstrip("/") + "/vertex_ai_auto_staging/" + timestamp - ) - - staged_data_uri = staging_gcs_subdir - if data_path_obj.is_file(): - staged_data_uri = staging_gcs_subdir + "/" + data_path_obj.name - - _logger.info(f'Uploading "{data_path}" to "{staged_data_uri}"') - upload_to_gcs( - source_path=data_path, - destination_uri=staged_data_uri, - project=project, - credentials=credentials, - ) - - return staged_data_uri diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py new file mode 100644 index 0000000000..ed4e231ebd --- /dev/null +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import datetime +import glob +import pathlib +from typing import Optional + +from google.auth import credentials as auth_credentials +from google.cloud import storage + +from google.cloud.aiplatform import initializer + + +_logger = logging.getLogger(__name__) + + +def upload_to_gcs( + source_path: str, + destination_uri: str, + project: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, +): + """Uploads local files to GCS. + + After upload the `destination_uri` will contain the same data as the `source_path`. + + Args: + source_path: Required. Path of the local data to copy to GCS. + destination_uri: Required. GCS URI where the data should be uploaded. + project: Optional. Google Cloud Project that contains the staging bucket. + credentials: The custom credentials to use when making API calls. + If not provided, default credentials will be used. + """ + source_path_obj = pathlib.Path(source_path) + if not source_path_obj.exists(): + raise RuntimeError(f"Source path does not exist: {source_path}") + + project = project or initializer.global_config.project + credentials = credentials or initializer.global_config.credentials + + storage_client = storage.Client(project=project, credentials=credentials) + if source_path_obj.is_dir(): + source_file_paths = glob.glob( + pathname=str(source_path_obj / "**"), recursive=True + ) + for source_file_path in source_file_paths: + source_file_path_obj = pathlib.Path(source_file_path) + if source_file_path_obj.is_dir(): + continue + source_file_relative_path_obj = source_file_path_obj.relative_to( + source_path_obj + ) + source_file_relative_posix_path = source_file_relative_path_obj.as_posix() + destination_file_uri = ( + destination_uri.rstrip("/") + "/" + source_file_relative_posix_path + ) + _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') + destination_blob = storage.Blob.from_string( + destination_file_uri, client=storage_client + ) + destination_blob.upload_from_filename(filename=source_file_path) + else: + source_file_path = source_path + destination_file_uri = destination_uri + _logger.debug(f'Uploading "{source_file_path}" to "{destination_file_uri}"') + destination_blob = storage.Blob.from_string( + destination_file_uri, client=storage_client + ) + destination_blob.upload_from_filename(filename=source_file_path) + + +def stage_local_data_in_gcs( + data_path: str, + staging_gcs_dir: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, +) -> str: + """Stages a local data in GCS. + + The file copied to GCS is the name of the local file prepended with an + "aiplatform-{timestamp}-" string. + + Args: + data_path: Required. Path of the local data to copy to GCS. + staging_gcs_dir: + Optional. Google Cloud Storage bucket to be used for data staging. + project: Optional. Google Cloud Project that contains the staging bucket. + location: Optional. Google Cloud location to use for the staging bucket. + credentials: The custom credentials to use when making API calls. + If not provided, default credentials will be used. + + Returns: + Google Cloud Storage URI of the staged data. + """ + data_path_obj = pathlib.Path(data_path) + + if not data_path_obj.exists(): + raise RuntimeError(f"Local data does not exist: data_path='{data_path}'") + + staging_gcs_dir = staging_gcs_dir or initializer.global_config.staging_bucket + if not staging_gcs_dir: + project = project or initializer.global_config.project + location = location or initializer.global_config.location + # Creating the bucket if it does not exist. + # Currently we only do this when staging_gcs_dir is not specified. + staging_bucket_name = project + "-staging" + client = storage.Client(project=project, credentials=credentials) + staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) + if not staging_bucket.exists(): + _logger.info(f'Creating staging GCS bucket "{staging_bucket_name}"') + staging_bucket = client.create_bucket( + bucket_or_name=staging_bucket, project=project, location=location, + ) + staging_gcs_dir = "gs://" + staging_bucket_name + + timestamp = datetime.datetime.now().isoformat(sep="-", timespec="milliseconds") + staging_gcs_subdir = ( + staging_gcs_dir.rstrip("/") + "/vertex_ai_auto_staging/" + timestamp + ) + + staged_data_uri = staging_gcs_subdir + if data_path_obj.is_file(): + staged_data_uri = staging_gcs_subdir + "/" + data_path_obj.name + + _logger.info(f'Uploading "{data_path}" to "{staged_data_uri}"') + upload_to_gcs( + source_path=data_path, + destination_uri=staged_data_uri, + project=project, + credentials=credentials, + ) + + return staged_data_uri From 5febd565f03cb3bab7e073688cc638a714d378ef Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 12 Nov 2021 17:38:10 -0800 Subject: [PATCH 13/36] Added the "Raises" sections to function docstrings --- google/cloud/aiplatform/utils/gcs_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index ed4e231ebd..7c4c34d4f6 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -45,6 +45,10 @@ def upload_to_gcs( project: Optional. Google Cloud Project that contains the staging bucket. credentials: The custom credentials to use when making API calls. If not provided, default credentials will be used. + + Raises: + RuntimeError: When source_path does not exist. + GoogleCloudError: When the upload process fails. """ source_path_obj = pathlib.Path(source_path) if not source_path_obj.exists(): @@ -107,6 +111,10 @@ def stage_local_data_in_gcs( Returns: Google Cloud Storage URI of the staged data. + + Raises: + RuntimeError: When source_path does not exist. + GoogleCloudError: When the upload process fails. """ data_path_obj = pathlib.Path(data_path) From 439d413ec2a0ad83d2c4c66b94acc18a897a518f Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 12 Nov 2021 17:44:06 -0800 Subject: [PATCH 14/36] When user-provided version is not recognized, show warning message instead of error message --- google/cloud/aiplatform/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index b0ba46d83d..a74bd3ada5 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2591,7 +2591,7 @@ def upload_xgboost_model_file( _LOGGER.info(f"Using the {xgboost_version} version of XGBoost.") if xgboost_version not in XGBOOST_SUPPORTED_VERSIONS: - _LOGGER.error( + _LOGGER.warning( f"XGBoost version {xgboost_version} is not supported. " f"Supported versions: {XGBOOST_SUPPORTED_VERSIONS}" ) @@ -2800,7 +2800,7 @@ def upload_scikit_learn_model_file( _LOGGER.info(f"Using the {sklearn_version} version of Scikit-learn.") if sklearn_version not in SKLEARN_SUPPORTED_VERSIONS: - _LOGGER.error( + _LOGGER.warning( f"Scikit-learn version {sklearn_version} is not supported. " f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" ) @@ -3015,7 +3015,7 @@ def upload_tensorflow_saved_model( _LOGGER.info(f"Using the {tensorflow_version} version of Tensorflow.") if tensorflow_version not in TENSORFLOW_SUPPORTED_VERSIONS: - _LOGGER.error( + _LOGGER.warning( f"Tensorflow version {tensorflow_version} is not supported. " f"Supported versions: {TENSORFLOW_SUPPORTED_VERSIONS}" ) From 1064243dc0a68527cb328b8c59f8b16ab8cbb0be Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 12 Nov 2021 18:02:56 -0800 Subject: [PATCH 15/36] Added docstring for _get_container_registry --- google/cloud/aiplatform/models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index a74bd3ada5..f93e4d9649 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -3052,6 +3052,14 @@ def upload_tensorflow_saved_model( def _get_container_registry(location: Optional[str] = None,) -> str: + """Gets container registry domain based on location. + + Args: + location: Google Cloud location + + Returns: + Google Container Registry domain. + """ location = location or "us-" if location.startswith("us-"): return "us-docker.pkg.dev" From 03f3e809dfe1e4806bd34dc539263b36954d6856 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 12 Nov 2021 18:48:47 -0800 Subject: [PATCH 16/36] Added missing import --- google/cloud/aiplatform/utils/gcs_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index 7c4c34d4f6..f52fd1015c 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -17,6 +17,7 @@ import datetime import glob +import logging import pathlib from typing import Optional From 31248d1746790e816b6efa2567504a6d07495da1 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 15 Nov 2021 22:51:07 -0800 Subject: [PATCH 17/36] fix: Fixed code after switching from mkdtemp to TemporaryDirectory TemporaryDirectory returns object with `.name` when called directly, but returns a path string when used as a context. --- google/cloud/aiplatform/models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index f93e4d9649..968186be3d 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2623,8 +2623,7 @@ def upload_xgboost_model_file( model_file_extension = ".bst" # Preparing model directory - with tempfile.TemporaryDirectory() as prepared_model_dir_obj: - prepared_model_dir = prepared_model_dir_obj.name + with tempfile.TemporaryDirectory() as prepared_model_dir: prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( "model" + model_file_extension ) @@ -2831,8 +2830,7 @@ def upload_scikit_learn_model_file( model_file_extension = ".pkl" # Preparing model directory - with tempfile.TemporaryDirectory() as prepared_model_dir_obj: - prepared_model_dir = prepared_model_dir_obj.name + with tempfile.TemporaryDirectory() as prepared_model_dir: prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( "model" + model_file_extension ) From 9dcc39fdf42e10c44584e99a71d25111a485edb9 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 15 Nov 2021 23:05:59 -0800 Subject: [PATCH 18/36] Made the staging bucket name location-specific The buckets that we create are regional. This prevents errors when some service required regional bucket. E.g. "FailedPrecondition: 400 The Cloud Storage bucket of `gs://...` is in location `us`. It must be in the same regional location as the service location `us-central1`." We are making the bucket name region-specific since the bucket is regional. --- google/cloud/aiplatform/utils/gcs_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index f52fd1015c..b6308192f2 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -128,7 +128,11 @@ def stage_local_data_in_gcs( location = location or initializer.global_config.location # Creating the bucket if it does not exist. # Currently we only do this when staging_gcs_dir is not specified. - staging_bucket_name = project + "-staging" + # The buckets that we create are regional. + # This prevents errors when some service required regional bucket. + # E.g. "FailedPrecondition: 400 The Cloud Storage bucket of `gs://...` is in location `us`. It must be in the same regional location as the service location `us-central1`." + # We are making the bucket name region-specific since the bucket is regional. + staging_bucket_name = project + "-staging-" + location client = storage.Client(project=project, credentials=credentials) staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) if not staging_bucket.exists(): From 24006fc37d42f952b24230da37d771a28b56e2e1 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 16 Nov 2021 00:51:12 -0800 Subject: [PATCH 19/36] Reverted the automatic temporary directory cleanup We cannot clean up the directory immediately after calling Model.upload since that call may be asynchronous and return before the model file has been read. The temporary data will be automatically cleaned up by the system later. --- google/cloud/aiplatform/models.py | 98 ++++++++++++++++--------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 968186be3d..d02471c1d9 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2623,30 +2623,33 @@ def upload_xgboost_model_file( model_file_extension = ".bst" # Preparing model directory - with tempfile.TemporaryDirectory() as prepared_model_dir: - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) + # We cannot clean up the directory immediately after calling Model.upload since + # that call may be asynchronous and return before the model file has been read. + # The temporary data will be automatically cleaned up by the system later. + prepared_model_dir = tempfile.mkdtemp() + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) @staticmethod def upload_scikit_learn_model_file( @@ -2830,30 +2833,33 @@ def upload_scikit_learn_model_file( model_file_extension = ".pkl" # Preparing model directory - with tempfile.TemporaryDirectory() as prepared_model_dir: - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) + # We cannot clean up the directory immediately after calling Model.upload since + # that call may be asynchronous and return before the model file has been read. + # The temporary data will be automatically cleaned up by the system later. + prepared_model_dir = tempfile.mkdtemp() + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return aiplatform.Model.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=sync, + ) @staticmethod def upload_tensorflow_saved_model( From e8ecce797a3857ca604a22145ffb8df37f1938e7 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 16 Nov 2021 01:02:14 -0800 Subject: [PATCH 20/36] Added unit tests for the Model.upload_* methods --- tests/unit/aiplatform/test_models.py | 163 +++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index cfd2669723..48ba947488 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -17,6 +17,7 @@ import importlib from concurrent import futures +import pathlib import pytest from unittest import mock from unittest.mock import patch @@ -423,6 +424,16 @@ def create_client_mock(): yield create_client_mock +@pytest.fixture +def mock_storage_blob_upload_from_filename(): + with patch( + "google.cloud.storage.Blob.upload_from_filename" + ) as mock_blob_upload_from_filename, patch( + "google.cloud.storage.Bucket.exists", return_value=True + ): + yield mock_blob_upload_from_filename + + class TestModel: def setup_method(self): importlib.reload(initializer) @@ -1430,3 +1441,155 @@ def test_get_and_return_subclass_not_found(self): fr"{_TEST_PIPELINE_RESOURCE_NAME}" ) ) + + @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "model_file_name", + ["my_model.xgb", "my_model.pkl", "my_model.joblib", "my_model.bst"], + ) + def test_upload_xgboost_model_file_uploads_and_gets_model( + self, + tmp_path: pathlib.Path, + model_file_name: str, + mock_storage_blob_upload_from_filename, + upload_model_mock, + get_model_mock, + sync: bool, + ): + model_file_path = tmp_path / model_file_name + model_file_path.touch() + + my_model = models.Model.upload_xgboost_model_file( + model_file_path=str(model_file_path), + xgboost_version="1.4", + display_name=_TEST_MODEL_NAME, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + sync=sync, + ) + + if not sync: + my_model.wait() + + upload_model_mock.assert_called_once() + upload_model_call_kwargs = upload_model_mock.call_args.kwargs + upload_model_model = upload_model_call_kwargs["model"] + + # Verifying the container image selection + assert ( + upload_model_model.container_spec.image_uri + == "us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-4:latest" + ) + + # Verifying the staging bucket name generation + assert upload_model_model.artifact_uri.startswith( + f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + ) + assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri + + # Verifying that the model was renamed to a file name that is acceptable for Model.upload + staged_model_file_path = mock_storage_blob_upload_from_filename.call_args.kwargs[ + "filename" + ] + staged_model_file_name = staged_model_file_path.split("/")[-1] + assert staged_model_file_name in ["model.bst", "model.pkl", "model.joblib"] + + @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "model_file_name", ["my_model.pkl", "my_model.joblib"], + ) + def test_upload_scikit_learn_model_file_uploads_and_gets_model( + self, + tmp_path: pathlib.Path, + model_file_name: str, + mock_storage_blob_upload_from_filename, + upload_model_mock, + get_model_mock, + sync: bool, + ): + model_file_path = tmp_path / model_file_name + model_file_path.touch() + + my_model = models.Model.upload_scikit_learn_model_file( + model_file_path=str(model_file_path), + sklearn_version="0.24", + display_name=_TEST_MODEL_NAME, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + sync=sync, + ) + + if not sync: + my_model.wait() + + upload_model_mock.assert_called_once() + upload_model_call_kwargs = upload_model_mock.call_args.kwargs + upload_model_model = upload_model_call_kwargs["model"] + + # Verifying the container image selection + assert ( + upload_model_model.container_spec.image_uri + == "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest" + ) + + # Verifying the staging bucket name generation + assert upload_model_model.artifact_uri.startswith( + f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + ) + assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri + + # Verifying that the model was renamed to a file name that is acceptable for Model.upload + staged_model_file_path = mock_storage_blob_upload_from_filename.call_args.kwargs[ + "filename" + ] + staged_model_file_name = staged_model_file_path.split("/")[-1] + assert staged_model_file_name in ["model.pkl", "model.joblib"] + + @pytest.mark.parametrize("sync", [True, False]) + def test_upload_tensorflow_saved_model_uploads_and_gets_model( + self, + tmp_path: pathlib.Path, + mock_storage_blob_upload_from_filename, + upload_model_mock, + get_model_mock, + sync: bool, + ): + saved_model_dir = tmp_path / "saved_model" + saved_model_dir.mkdir() + (saved_model_dir / "saved_model.pb").touch() + + my_model = models.Model.upload_tensorflow_saved_model( + saved_model_dir=str(saved_model_dir), + tensorflow_version="2.6", + use_gpu=True, + display_name=_TEST_MODEL_NAME, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + sync=sync, + ) + + if not sync: + my_model.wait() + + upload_model_mock.assert_called_once() + upload_model_call_kwargs = upload_model_mock.call_args.kwargs + upload_model_model = upload_model_call_kwargs["model"] + + # Verifying the container image selection + assert ( + upload_model_model.container_spec.image_uri + == "us-docker.pkg.dev/vertex-ai/prediction/tf2-gpu.2-6:latest" + ) + + # Verifying the staging bucket name generation + assert upload_model_model.artifact_uri.startswith( + f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + ) + assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri + + # Verifying that the model files were uploaded + staged_model_file_path = mock_storage_blob_upload_from_filename.call_args.kwargs[ + "filename" + ] + staged_model_file_name = staged_model_file_path.split("/")[-1] + assert staged_model_file_name in ["saved_model.pb", "saved_model.pbtxt"] From b521bdeb2ea9db2bd396bc8cf6a08bb8d8d80765 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 16 Nov 2021 01:11:06 -0800 Subject: [PATCH 21/36] Added unit test for Model.upload data staging feature --- tests/unit/aiplatform/test_models.py | 58 ++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 48ba947488..e73398d3e3 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -1494,6 +1494,64 @@ def test_upload_xgboost_model_file_uploads_and_gets_model( staged_model_file_name = staged_model_file_path.split("/")[-1] assert staged_model_file_name in ["model.bst", "model.pkl", "model.joblib"] + @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "model_file_name", + [ + "model.bst", + "model.pkl", + "model.joblib", + "saved_model.pb", + "saved_model.pbtxt", + ], + ) + def test_upload_stages_data_uploads_and_gets_model( + self, + tmp_path: pathlib.Path, + model_file_name: str, + mock_storage_blob_upload_from_filename, + upload_model_mock, + get_model_mock, + sync: bool, + ): + model_file_path = tmp_path / model_file_name + model_file_path.touch() + + my_model = models.Model.upload( + artifact_uri=str(tmp_path), + serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-4:latest", + display_name=_TEST_MODEL_NAME, + project=_TEST_PROJECT, + location=_TEST_LOCATION, + sync=sync, + ) + + if not sync: + my_model.wait() + + upload_model_mock.assert_called_once() + upload_model_call_kwargs = upload_model_mock.call_args.kwargs + upload_model_model = upload_model_call_kwargs["model"] + + # Verifying the staging bucket name generation + assert upload_model_model.artifact_uri.startswith( + f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + ) + assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri + + # Verifying that the model was renamed to a file name that is acceptable for Model.upload + staged_model_file_path = mock_storage_blob_upload_from_filename.call_args.kwargs[ + "filename" + ] + staged_model_file_name = staged_model_file_path.split("/")[-1] + assert staged_model_file_name in [ + "model.bst", + "model.pkl", + "model.joblib", + "saved_model.pb", + "saved_model.pbtxt", + ] + @pytest.mark.parametrize("sync", [True, False]) @pytest.mark.parametrize( "model_file_name", ["my_model.pkl", "my_model.joblib"], From b9642248a875644d20995907570459835b6b4e6e Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Nov 2021 15:29:29 -0800 Subject: [PATCH 22/36] Added new container image versions --- google/cloud/aiplatform/models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index d02471c1d9..101df646e0 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2685,7 +2685,7 @@ def upload_scikit_learn_model_file( model_file_path (str): Required. Local file path of the model. sklearn_version (str): Optional. The version of the Scikit-learn serving container. - Supported versions: ["0.20", "0.22", "0.23", "0.24"]. + Supported versions: ["0.20", "0.22", "0.23", "0.24", "1.0"]. If the version is not specified, the latest version is used. display_name (str): Optional. The display name of the Model. The name can be up to 128 @@ -2787,7 +2787,7 @@ def upload_scikit_learn_model_file( Also if model directory does not contain a supported model file. """ # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#scikit-learn - SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24"] + SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24", "1.0"] SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE = ( "{registry}/vertex-ai/prediction/sklearn-{cpu_or_gpu}.{version}:latest" ) @@ -2897,7 +2897,7 @@ def upload_tensorflow_saved_model( Local directory of the Tensorflow SavedModel. tensorflow_version (str): Optional. The version of the Tensorflow serving container. - Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6"]. + Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"]. If the version is not specified, the latest version is used. use_gpu (bool): Whether to use GPU for model serving. display_name (str): @@ -3008,6 +3008,7 @@ def upload_tensorflow_saved_model( "2.4", "2.5", "2.6", + "2.7", ] TENSORFLOW_CONTAINER_IMAGE_URI_TEMPLATE = ( "{registry}/vertex-ai/prediction/tf{tf2_or_1}-{cpu_or_gpu}.{version}:latest" From 6039548a64649f7a5bf24c888ad394a8f8c23573 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Nov 2021 15:32:27 -0800 Subject: [PATCH 23/36] Moved the default model display names to the function signatures --- google/cloud/aiplatform/models.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 101df646e0..e46bd86187 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2443,7 +2443,7 @@ def export_model( def upload_xgboost_model_file( model_file_path: str, xgboost_version: Optional[str] = None, - display_name: Optional[str] = None, + display_name: Optional[str] = "XGBoost model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, @@ -2604,8 +2604,6 @@ def upload_xgboost_model_file( version=xgboost_version.replace(".", "-"), ) - display_name = display_name or "XGBoost model" - model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( @@ -2655,7 +2653,7 @@ def upload_xgboost_model_file( def upload_scikit_learn_model_file( model_file_path: str, sklearn_version: Optional[str] = None, - display_name: Optional[str] = None, + display_name: Optional[str] = "Scikit-learn model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, @@ -2814,8 +2812,6 @@ def upload_scikit_learn_model_file( version=sklearn_version.replace(".", "-"), ) - display_name = display_name or "Scikit-learn model" - model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( @@ -2866,7 +2862,7 @@ def upload_tensorflow_saved_model( saved_model_dir: str, tensorflow_version: Optional[str] = None, use_gpu: bool = False, - display_name: Optional[str] = None, + display_name: Optional[str] = "Tensorflow model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, @@ -3034,8 +3030,6 @@ def upload_tensorflow_saved_model( version=tensorflow_version.replace(".", "-"), ) - display_name = display_name or "Tensorflow model" - return aiplatform.Model.upload( serving_container_image_uri=container_image_uri, artifact_uri=saved_model_dir, From 079e6e9414962903daf7d109555b42d57c8bdcab Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Nov 2021 15:39:54 -0800 Subject: [PATCH 24/36] Moved the default container versions to the function definitions --- google/cloud/aiplatform/models.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index e46bd86187..a0373b9d4b 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2442,7 +2442,7 @@ def export_model( @staticmethod def upload_xgboost_model_file( model_file_path: str, - xgboost_version: Optional[str] = None, + xgboost_version: Optional[str] = "1.4", display_name: Optional[str] = "XGBoost model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, @@ -2585,10 +2585,7 @@ def upload_xgboost_model_file( ".bst", ] - if xgboost_version is None: - # Using the latest version - xgboost_version = XGBOOST_SUPPORTED_VERSIONS[-1] - _LOGGER.info(f"Using the {xgboost_version} version of XGBoost.") + _LOGGER.info(f"Using the {xgboost_version} version of XGBoost.") if xgboost_version not in XGBOOST_SUPPORTED_VERSIONS: _LOGGER.warning( @@ -2652,7 +2649,7 @@ def upload_xgboost_model_file( @staticmethod def upload_scikit_learn_model_file( model_file_path: str, - sklearn_version: Optional[str] = None, + sklearn_version: Optional[str] = "1.0", display_name: Optional[str] = "Scikit-learn model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, @@ -2794,10 +2791,7 @@ def upload_scikit_learn_model_file( ".joblib", ] - if sklearn_version is None: - # Using the latest version - sklearn_version = SKLEARN_SUPPORTED_VERSIONS[-1] - _LOGGER.info(f"Using the {sklearn_version} version of Scikit-learn.") + _LOGGER.info(f"Using the {sklearn_version} version of Scikit-learn.") if sklearn_version not in SKLEARN_SUPPORTED_VERSIONS: _LOGGER.warning( @@ -2860,7 +2854,7 @@ def upload_scikit_learn_model_file( @staticmethod def upload_tensorflow_saved_model( saved_model_dir: str, - tensorflow_version: Optional[str] = None, + tensorflow_version: Optional[str] = "2.7", use_gpu: bool = False, display_name: Optional[str] = "Tensorflow model", description: Optional[str] = None, @@ -3010,10 +3004,7 @@ def upload_tensorflow_saved_model( "{registry}/vertex-ai/prediction/tf{tf2_or_1}-{cpu_or_gpu}.{version}:latest" ) - if tensorflow_version is None: - # Using the latest version - tensorflow_version = TENSORFLOW_SUPPORTED_VERSIONS[-1] - _LOGGER.info(f"Using the {tensorflow_version} version of Tensorflow.") + _LOGGER.info(f"Using the {tensorflow_version} version of Tensorflow.") if tensorflow_version not in TENSORFLOW_SUPPORTED_VERSIONS: _LOGGER.warning( From 73a48681fe6e58fa7930983c9c113a6a7bd5c7e0 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 23 Nov 2021 19:45:10 -0800 Subject: [PATCH 25/36] Added model upload integration test --- tests/system/aiplatform/test_model_upload.py | 59 ++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 tests/system/aiplatform/test_model_upload.py diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py new file mode 100644 index 0000000000..cad6572879 --- /dev/null +++ b/tests/system/aiplatform/test_model_upload.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import tempfile +import importlib + +from google import auth as google_auth +from google.cloud import aiplatform +from google.cloud import storage +from google.cloud.aiplatform import initializer + +# TODO(vinnys): Replace with env var `BUILD_SPECIFIC_GCP_PROJECT` once supported +_, _TEST_PROJECT = google_auth.default() +_TEST_LOCATION = "us-central1" + +_XGBOOST_MODEL_URI = "gs://ucaip-test-us-central1/models/iris_xgboost/model.bst" + + +class TestModel: + def setup_method(self): + importlib.reload(initializer) + importlib.reload(aiplatform) + + def test_upload_and_deploy_xgboost_model(self): + """Upload XGBoost model from local file and deploy it for prediction.""" + + aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) + + storage_client = storage.Client(project=_TEST_PROJECT) + model_blob = storage.Blob.from_string( + uri=_XGBOOST_MODEL_URI, client=storage_client + ) + model_path = tempfile.mktemp() + ".my_model.xgb" + model_blob.download_to_filename(filename=model_path) + + model = aiplatform.Model.upload_xgboost_model_file(model_file_path=model_path,) + + # Currently we need to explicitly specify machine type. + # See https://github.com/googleapis/python-aiplatform/issues/773 + endpoint = model.deploy(machine_type="n1-standard-2") + predict_response = endpoint.predict(instances=[[0, 0, 0]]) + assert len(predict_response.predictions) == 1 + + endpoint.delete(force=True) + model.delete() From db65f67c18cd5a6e7f21e9ed5ddbb581c73e7847 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 01:49:47 -0800 Subject: [PATCH 26/36] Removed the container image URI generation code as asked by the reviewer The `aiplatform.helpers.get_prebuilt_prediction_container_uri` does not support future framework versions yet. See https://github.com/googleapis/python-aiplatform/pull/779#discussion_r759395754 --- google/cloud/aiplatform/models.py | 103 +++++------------------------- 1 file changed, 15 insertions(+), 88 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index a0373b9d4b..e79eb7bc06 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2573,32 +2573,17 @@ def upload_xgboost_model_file( is specified. Also if model directory does not contain a supported model file. """ - # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#xgboost - XGBOOST_SUPPORTED_VERSIONS = ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"] - XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE = ( - "{registry}/vertex-ai/prediction/xgboost-{cpu_or_gpu}.{version}:latest" - ) - XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS = [ ".pkl", ".joblib", ".bst", ] - _LOGGER.info(f"Using the {xgboost_version} version of XGBoost.") - - if xgboost_version not in XGBOOST_SUPPORTED_VERSIONS: - _LOGGER.warning( - f"XGBoost version {xgboost_version} is not supported. " - f"Supported versions: {XGBOOST_SUPPORTED_VERSIONS}" - ) - - container_image_uri = XGBOOST_CONTAINER_IMAGE_URI_TEMPLATE.format( - registry=_get_container_registry( - location or aiplatform.initializer.global_config.location - ), - cpu_or_gpu="cpu", - version=xgboost_version.replace(".", "-"), + container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( + region=location, + framework="xgboost", + framework_version=xgboost_version, + accelerator="cpu", ) model_file_path_obj = pathlib.Path(model_file_path) @@ -2781,29 +2766,16 @@ def upload_scikit_learn_model_file( is specified. Also if model directory does not contain a supported model file. """ - # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#scikit-learn - SKLEARN_SUPPORTED_VERSIONS = ["0.20", "0.22", "0.23", "0.24", "1.0"] - SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE = ( - "{registry}/vertex-ai/prediction/sklearn-{cpu_or_gpu}.{version}:latest" - ) SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS = [ ".pkl", ".joblib", ] - _LOGGER.info(f"Using the {sklearn_version} version of Scikit-learn.") - - if sklearn_version not in SKLEARN_SUPPORTED_VERSIONS: - _LOGGER.warning( - f"Scikit-learn version {sklearn_version} is not supported. " - f"Supported versions: {SKLEARN_SUPPORTED_VERSIONS}" - ) - container_image_uri = SKLEARN_CONTAINER_IMAGE_URI_TEMPLATE.format( - registry=_get_container_registry( - location or aiplatform.initializer.global_config.location - ), - cpu_or_gpu="cpu", - version=sklearn_version.replace(".", "-"), + container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( + region=location, + framework="sklearn", + framework_version=sklearn_version, + accelerator="cpu", ) model_file_path_obj = pathlib.Path(model_file_path) @@ -2989,36 +2961,11 @@ def upload_tensorflow_saved_model( is specified. Also if model directory does not contain a supported model file. """ - # https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers#tensorflow - TENSORFLOW_SUPPORTED_VERSIONS = [ - "0.15", - "2.1", - "2.2", - "2.3", - "2.4", - "2.5", - "2.6", - "2.7", - ] - TENSORFLOW_CONTAINER_IMAGE_URI_TEMPLATE = ( - "{registry}/vertex-ai/prediction/tf{tf2_or_1}-{cpu_or_gpu}.{version}:latest" - ) - - _LOGGER.info(f"Using the {tensorflow_version} version of Tensorflow.") - - if tensorflow_version not in TENSORFLOW_SUPPORTED_VERSIONS: - _LOGGER.warning( - f"Tensorflow version {tensorflow_version} is not supported. " - f"Supported versions: {TENSORFLOW_SUPPORTED_VERSIONS}" - ) - - container_image_uri = TENSORFLOW_CONTAINER_IMAGE_URI_TEMPLATE.format( - registry=_get_container_registry( - location or aiplatform.initializer.global_config.location - ), - tf2_or_1=("2" if tensorflow_version.startswith("2.") else ""), - cpu_or_gpu="gpu" if use_gpu else "cpu", - version=tensorflow_version.replace(".", "-"), + container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( + region=location, + framework="tensorflow", + framework_version=tensorflow_version, + accelerator="gpu" if use_gpu else "cpu", ) return aiplatform.Model.upload( @@ -3039,23 +2986,3 @@ def upload_tensorflow_saved_model( staging_bucket=staging_bucket, sync=sync, ) - - -def _get_container_registry(location: Optional[str] = None,) -> str: - """Gets container registry domain based on location. - - Args: - location: Google Cloud location - - Returns: - Google Container Registry domain. - """ - location = location or "us-" - if location.startswith("us-"): - return "us-docker.pkg.dev" - elif location.startswith("europe-"): - return "europe-docker.pkg.dev" - elif location.startswith("asia-"): - return "asia-docker.pkg.dev" - else: - raise ValueError(f"Unrecognized location: {location}") From 4d2e440a160cb20b21ddad59b5470be99ad4778d Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 01:53:56 -0800 Subject: [PATCH 27/36] Added the return type annotations --- google/cloud/aiplatform/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index e79eb7bc06..45324a9e5b 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2457,7 +2457,7 @@ def upload_xgboost_model_file( encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, - ): + ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. @@ -2649,7 +2649,7 @@ def upload_scikit_learn_model_file( encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, - ): + ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. @@ -2842,7 +2842,7 @@ def upload_tensorflow_saved_model( encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, - ): + ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. From 7b71234887f32d11978af96d4d8bc05df2210759 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 01:55:51 -0800 Subject: [PATCH 28/36] Removed the Optional type annotation from parameters with default value --- google/cloud/aiplatform/models.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 45324a9e5b..66002917c1 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2442,8 +2442,8 @@ def export_model( @staticmethod def upload_xgboost_model_file( model_file_path: str, - xgboost_version: Optional[str] = "1.4", - display_name: Optional[str] = "XGBoost model", + xgboost_version: str = "1.4", + display_name: str = "XGBoost model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, @@ -2634,8 +2634,8 @@ def upload_xgboost_model_file( @staticmethod def upload_scikit_learn_model_file( model_file_path: str, - sklearn_version: Optional[str] = "1.0", - display_name: Optional[str] = "Scikit-learn model", + sklearn_version: str = "1.0", + display_name: str = "Scikit-learn model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, @@ -2826,9 +2826,9 @@ def upload_scikit_learn_model_file( @staticmethod def upload_tensorflow_saved_model( saved_model_dir: str, - tensorflow_version: Optional[str] = "2.7", + tensorflow_version: str = "2.7", use_gpu: bool = False, - display_name: Optional[str] = "Tensorflow model", + display_name: str = "Tensorflow model", description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, From ab8729933f8d4050bb0c9eb7ea08bc8d73a4269e Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 01:59:01 -0800 Subject: [PATCH 29/36] Added "vertex" to the default staging bucket name suffix --- google/cloud/aiplatform/utils/gcs_utils.py | 2 +- tests/unit/aiplatform/test_models.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index b6308192f2..23bdd71fff 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -132,7 +132,7 @@ def stage_local_data_in_gcs( # This prevents errors when some service required regional bucket. # E.g. "FailedPrecondition: 400 The Cloud Storage bucket of `gs://...` is in location `us`. It must be in the same regional location as the service location `us-central1`." # We are making the bucket name region-specific since the bucket is regional. - staging_bucket_name = project + "-staging-" + location + staging_bucket_name = project + "-vertex-staging-" + location client = storage.Client(project=project, credentials=credentials) staging_bucket = storage.Bucket(client=client, name=staging_bucket_name) if not staging_bucket.exists(): diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 0e5c47edc4..d7cd694223 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -1495,7 +1495,7 @@ def test_upload_xgboost_model_file_uploads_and_gets_model( # Verifying the staging bucket name generation assert upload_model_model.artifact_uri.startswith( - f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + f"gs://{_TEST_PROJECT}-vertex-staging-{_TEST_LOCATION}" ) assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri @@ -1547,7 +1547,7 @@ def test_upload_stages_data_uploads_and_gets_model( # Verifying the staging bucket name generation assert upload_model_model.artifact_uri.startswith( - f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + f"gs://{_TEST_PROJECT}-vertex-staging-{_TEST_LOCATION}" ) assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri @@ -1604,7 +1604,7 @@ def test_upload_scikit_learn_model_file_uploads_and_gets_model( # Verifying the staging bucket name generation assert upload_model_model.artifact_uri.startswith( - f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + f"gs://{_TEST_PROJECT}-vertex-staging-{_TEST_LOCATION}" ) assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri @@ -1653,7 +1653,7 @@ def test_upload_tensorflow_saved_model_uploads_and_gets_model( # Verifying the staging bucket name generation assert upload_model_model.artifact_uri.startswith( - f"gs://{_TEST_PROJECT}-staging-{_TEST_LOCATION}" + f"gs://{_TEST_PROJECT}-vertex-staging-{_TEST_LOCATION}" ) assert "/vertex_ai_auto_staging/" in upload_model_model.artifact_uri From 6ced3ba5db7f4b78d0069f0983bbd8bf8c70f2b5 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 02:10:05 -0800 Subject: [PATCH 30/36] Falling back to the globally-set credentials when not specified in stage_local_data_in_gcs --- google/cloud/aiplatform/utils/gcs_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/aiplatform/utils/gcs_utils.py b/google/cloud/aiplatform/utils/gcs_utils.py index 23bdd71fff..b7fc2d9291 100644 --- a/google/cloud/aiplatform/utils/gcs_utils.py +++ b/google/cloud/aiplatform/utils/gcs_utils.py @@ -126,6 +126,7 @@ def stage_local_data_in_gcs( if not staging_gcs_dir: project = project or initializer.global_config.project location = location or initializer.global_config.location + credentials = credentials or initializer.global_config.credentials # Creating the bucket if it does not exist. # Currently we only do this when staging_gcs_dir is not specified. # The buckets that we create are regional. From 7588d4bc717b8033ec40e42e3e8c77d4efde44bc Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 02:16:27 -0800 Subject: [PATCH 31/36] Changed the static methods to class methods as requested --- google/cloud/aiplatform/models.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 66002917c1..10a67ee8d7 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2439,8 +2439,9 @@ def export_model( return json_format.MessageToDict(operation_future.metadata.output_info._pb) - @staticmethod + @classmethod def upload_xgboost_model_file( + cls, model_file_path: str, xgboost_version: str = "1.4", display_name: str = "XGBoost model", @@ -2612,7 +2613,7 @@ def upload_xgboost_model_file( ) shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( + return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=prepared_model_dir, display_name=display_name, @@ -2631,8 +2632,9 @@ def upload_xgboost_model_file( sync=sync, ) - @staticmethod + @classmethod def upload_scikit_learn_model_file( + cls, model_file_path: str, sklearn_version: str = "1.0", display_name: str = "Scikit-learn model", @@ -2804,7 +2806,7 @@ def upload_scikit_learn_model_file( ) shutil.copy(model_file_path_obj, prepared_model_file_path) - return aiplatform.Model.upload( + return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=prepared_model_dir, display_name=display_name, @@ -2823,8 +2825,9 @@ def upload_scikit_learn_model_file( sync=sync, ) - @staticmethod + @classmethod def upload_tensorflow_saved_model( + cls, saved_model_dir: str, tensorflow_version: str = "2.7", use_gpu: bool = False, @@ -2968,7 +2971,7 @@ def upload_tensorflow_saved_model( accelerator="gpu" if use_gpu else "cpu", ) - return aiplatform.Model.upload( + return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=saved_model_dir, display_name=display_name, From 39abcc31937ea21dde8fbd69dde8ce79bd1af4b4 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 02:37:12 -0800 Subject: [PATCH 32/36] Auto-deleting the temporary files --- google/cloud/aiplatform/models.py | 100 ++++++++++++++++-------------- 1 file changed, 52 insertions(+), 48 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 10a67ee8d7..fe7af338f2 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2440,6 +2440,7 @@ def export_model( return json_format.MessageToDict(operation_future.metadata.output_info._pb) @classmethod + @base.optional_sync() def upload_xgboost_model_file( cls, model_file_path: str, @@ -2606,33 +2607,35 @@ def upload_xgboost_model_file( # Preparing model directory # We cannot clean up the directory immediately after calling Model.upload since # that call may be asynchronous and return before the model file has been read. - # The temporary data will be automatically cleaned up by the system later. - prepared_model_dir = tempfile.mkdtemp() - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) + # To work around this, we make this method asynchronous (decorate with @base.optional_sync) + # but call Model.upload with sync=True. + with tempfile.TemporaryDirectory() as prepared_model_dir: + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return cls.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return cls.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=True, + ) @classmethod + @base.optional_sync() def upload_scikit_learn_model_file( cls, model_file_path: str, @@ -2799,31 +2802,32 @@ def upload_scikit_learn_model_file( # Preparing model directory # We cannot clean up the directory immediately after calling Model.upload since # that call may be asynchronous and return before the model file has been read. - # The temporary data will be automatically cleaned up by the system later. - prepared_model_dir = tempfile.mkdtemp() - prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( - "model" + model_file_extension - ) - shutil.copy(model_file_path_obj, prepared_model_file_path) + # To work around this, we make this method asynchronous (decorate with @base.optional_sync) + # but call Model.upload with sync=True. + with tempfile.TemporaryDirectory() as prepared_model_dir: + prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( + "model" + model_file_extension + ) + shutil.copy(model_file_path_obj, prepared_model_file_path) - return cls.upload( - serving_container_image_uri=container_image_uri, - artifact_uri=prepared_model_dir, - display_name=display_name, - description=description, - instance_schema_uri=instance_schema_uri, - parameters_schema_uri=parameters_schema_uri, - prediction_schema_uri=prediction_schema_uri, - explanation_metadata=explanation_metadata, - explanation_parameters=explanation_parameters, - project=project, - location=location, - credentials=credentials, - labels=labels, - encryption_spec_key_name=encryption_spec_key_name, - staging_bucket=staging_bucket, - sync=sync, - ) + return cls.upload( + serving_container_image_uri=container_image_uri, + artifact_uri=prepared_model_dir, + display_name=display_name, + description=description, + instance_schema_uri=instance_schema_uri, + parameters_schema_uri=parameters_schema_uri, + prediction_schema_uri=prediction_schema_uri, + explanation_metadata=explanation_metadata, + explanation_parameters=explanation_parameters, + project=project, + location=location, + credentials=credentials, + labels=labels, + encryption_spec_key_name=encryption_spec_key_name, + staging_bucket=staging_bucket, + sync=True, + ) @classmethod def upload_tensorflow_saved_model( From af8317b6fbeadb370d00964561655dfe1261299d Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 02:39:19 -0800 Subject: [PATCH 33/36] Reworded some error messages --- google/cloud/aiplatform/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index fe7af338f2..87af6b16bf 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -1725,7 +1725,7 @@ def upload( if re.match(PREBUILT_IMAGE_RE, serving_container_image_uri): if not model_dir.is_dir(): raise ValueError( - f"artifact_uri path must be a directory: '{artifact_uri}'" + f"artifact_uri path must be a directory: '{artifact_uri}' when using prebuilt image '{serving_container_image_uri}'" ) if not any( (model_dir / file_name).exists() @@ -1733,7 +1733,7 @@ def upload( ): raise ValueError( "artifact_uri directory does not contain any supported model files. " - f"The upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" + f"When using a prebuilt serving image, the upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" ) # Uploading the model From 624048ab5df6f4a19cafb56d57a8451c50b4dd34 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 02:58:05 -0800 Subject: [PATCH 34/36] Resolved the integration test feedback --- tests/system/aiplatform/test_model_upload.py | 26 +++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py index cad6572879..acaeb2bad9 100644 --- a/tests/system/aiplatform/test_model_upload.py +++ b/tests/system/aiplatform/test_model_upload.py @@ -18,11 +18,15 @@ import tempfile import importlib +import pytest + from google import auth as google_auth from google.cloud import aiplatform from google.cloud import storage from google.cloud.aiplatform import initializer +from tests.system.aiplatform import e2e_base + # TODO(vinnys): Replace with env var `BUILD_SPECIFIC_GCP_PROJECT` once supported _, _TEST_PROJECT = google_auth.default() _TEST_LOCATION = "us-central1" @@ -30,12 +34,9 @@ _XGBOOST_MODEL_URI = "gs://ucaip-test-us-central1/models/iris_xgboost/model.bst" -class TestModel: - def setup_method(self): - importlib.reload(initializer) - importlib.reload(aiplatform) - - def test_upload_and_deploy_xgboost_model(self): +class TestModel(e2e_base.TestEndToEnd): + @pytest.mark.usefixtures("teardown") + def test_upload_and_deploy_xgboost_model(self, shared_state): """Upload XGBoost model from local file and deploy it for prediction.""" aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION) @@ -48,12 +49,19 @@ def test_upload_and_deploy_xgboost_model(self): model_blob.download_to_filename(filename=model_path) model = aiplatform.Model.upload_xgboost_model_file(model_file_path=model_path,) + shared_state["resources"] = [model] + + staging_bucket = storage.Blob.from_string( + uri=model.uri, client=storage_client + ).bucket + # Checking that the bucket is auto-generated + assert "-vertex-staging-" in staging_bucket.name + + shared_state["bucket"] = staging_bucket # Currently we need to explicitly specify machine type. # See https://github.com/googleapis/python-aiplatform/issues/773 endpoint = model.deploy(machine_type="n1-standard-2") + shared_state["resources"].append(endpoint) predict_response = endpoint.predict(instances=[[0, 0, 0]]) assert len(predict_response.predictions) == 1 - - endpoint.delete(force=True) - model.delete() From 1ab4c9aba1f74e8719fca2eb92244999b3778449 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 11:50:15 -0800 Subject: [PATCH 35/36] Removed unused imports --- tests/system/aiplatform/test_model_upload.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py index acaeb2bad9..2be4653ff5 100644 --- a/tests/system/aiplatform/test_model_upload.py +++ b/tests/system/aiplatform/test_model_upload.py @@ -16,14 +16,12 @@ # import tempfile -import importlib import pytest from google import auth as google_auth from google.cloud import aiplatform from google.cloud import storage -from google.cloud.aiplatform import initializer from tests.system.aiplatform import e2e_base From fb020721bf95081318a0119493b85056345328f0 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 1 Dec 2021 11:57:24 -0800 Subject: [PATCH 36/36] Fixed the integration test teardown --- tests/system/aiplatform/test_model_upload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/system/aiplatform/test_model_upload.py b/tests/system/aiplatform/test_model_upload.py index 2be4653ff5..90816b3cb6 100644 --- a/tests/system/aiplatform/test_model_upload.py +++ b/tests/system/aiplatform/test_model_upload.py @@ -32,8 +32,10 @@ _XGBOOST_MODEL_URI = "gs://ucaip-test-us-central1/models/iris_xgboost/model.bst" +@pytest.mark.usefixtures("delete_staging_bucket", "teardown") class TestModel(e2e_base.TestEndToEnd): - @pytest.mark.usefixtures("teardown") + _temp_prefix = f"{_TEST_PROJECT}-vertex-staging-{_TEST_LOCATION}" + def test_upload_and_deploy_xgboost_model(self, shared_state): """Upload XGBoost model from local file and deploy it for prediction."""