From f6f9a97bb178d9859b8d43166a43792d88e57710 Mon Sep 17 00:00:00 2001 From: Yaqi Ji Date: Thu, 8 Jul 2021 12:27:06 -0700 Subject: [PATCH] feat: add explain get_metadata function for tf2. (#507) * feat: add cancel method to pipeline client * feat: add basic metadata structure for XAI explain * feat: add tf2 get_metadata function * feat: add tf2 get_metadata function * Add more tests for tf2_getmetadata * Address comments * Update to tensorflow instead of tensorflow-cpu * Move one time use setup function --- .../explain/metadata/metadata_builder.py | 4 - .../explain/metadata/tf/v2/__init__.py | 15 ++ .../tf/v2/saved_model_metadata_builder.py | 133 ++++++++++++++ setup.py | 12 +- ...plain_saved_model_metadata_builder_test.py | 167 ++++++++++++++++++ 5 files changed, 325 insertions(+), 6 deletions(-) create mode 100644 google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py create mode 100644 google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py create mode 100644 tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py diff --git a/google/cloud/aiplatform/explain/metadata/metadata_builder.py b/google/cloud/aiplatform/explain/metadata/metadata_builder.py index c4bf94e65d..98e768ea24 100644 --- a/google/cloud/aiplatform/explain/metadata/metadata_builder.py +++ b/google/cloud/aiplatform/explain/metadata/metadata_builder.py @@ -25,10 +25,6 @@ class MetadataBuilder(_ABC): """Abstract base class for metadata builders.""" - @abc.abstractmethod - def save_model_with_metadata(self, filepath: str): - """Saves the model with metadata.""" - @abc.abstractmethod def get_metadata(self): """Returns the current metadata as a dictionary.""" diff --git a/google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py b/google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py new file mode 100644 index 0000000000..0e973c9a40 --- /dev/null +++ b/google/cloud/aiplatform/explain/metadata/tf/v2/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py b/google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py new file mode 100644 index 0000000000..9541310d21 --- /dev/null +++ b/google/cloud/aiplatform/explain/metadata/tf/v2/saved_model_metadata_builder.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.protobuf import json_format +from typing import Optional, List, Dict, Any, Tuple + +from google.cloud.aiplatform.explain.metadata import metadata_builder +from google.cloud.aiplatform.compat.types import ( + explanation_metadata_v1beta1 as explanation_metadata, +) + + +class SavedModelMetadataBuilder(metadata_builder.MetadataBuilder): + """Class for generating metadata for a model built with TF 2.X Keras API.""" + + def __init__( + self, + model_path: str, + signature_name: Optional[str] = None, + outputs_to_explain: Optional[List[str]] = None, + **kwargs + ) -> None: + """Initializes a SavedModelMetadataBuilder object. + + Args: + model_path: + Required. Path to load the saved model from. + signature_name: + Optional. Name of the signature to be explained. Inputs and + outputs of this signature will be written in the metadata. If not + provided, the default signature will be used. + outputs_to_explain: + Optional. List of output names to explain. Only single output is + supported for now. Hence, the list should contain one element. + This parameter is required if the model signature (provided via + signature_name) specifies multiple outputs. + **kwargs: + Any keyword arguments to be passed to tf.saved_model.save() function. + + Raises: + ValueError if outputs_to_explain contains more than 1 element. + ImportError if tf is not imported. + """ + if outputs_to_explain and len(outputs_to_explain) > 1: + raise ValueError( + '"outputs_to_explain" can only contain 1 element.\n' + "Got: %s" % len(outputs_to_explain) + ) + self._explain_output = outputs_to_explain + self._saved_model_args = kwargs + + try: + import tensorflow as tf + except ImportError: + raise ImportError( + "Tensorflow is not installed and is required to load saved model. " + 'Please install the SDK using "pip install google-cloud-aiplatform[full]"' + ) + + if not signature_name: + signature_name = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY + self._loaded_model = tf.saved_model.load(model_path) + self._inputs, self._outputs = self._infer_metadata_entries_from_model( + signature_name + ) + + def _infer_metadata_entries_from_model( + self, signature_name: str + ) -> Tuple[ + Dict[str, explanation_metadata.ExplanationMetadata.InputMetadata], + Dict[str, explanation_metadata.ExplanationMetadata.OutputMetadata], + ]: + """Infers metadata inputs and outputs. + + Args: + signature_name: + Required. Name of the signature to be explained. Inputs and outputs of this signature will be written in the metadata. If not provided, the default signature will be used. + + Returns: + Inferred input metadata and output metadata from the model. + + Raises: + ValueError if specified name is not found in signature outputs. + """ + + loaded_sig = self._loaded_model.signatures[signature_name] + _, input_sig = loaded_sig.structured_input_signature + output_sig = loaded_sig.structured_outputs + input_mds = {} + for name, tensor_spec in input_sig.items(): + input_mds[name] = explanation_metadata.ExplanationMetadata.InputMetadata( + input_tensor_name=name, + modality=None if tensor_spec.dtype.is_floating else "categorical", + ) + + output_mds = {} + for name in output_sig: + if not self._explain_output or self._explain_output[0] == name: + output_mds[ + name + ] = explanation_metadata.ExplanationMetadata.OutputMetadata( + output_tensor_name=name, + ) + break + else: + raise ValueError( + "Specified output name cannot be found in given signature outputs." + ) + return input_mds, output_mds + + def get_metadata(self) -> Dict[str, Any]: + """Returns the current metadata as a dictionary. + + Returns: + Json format of the explanation metadata. + """ + current_md = explanation_metadata.ExplanationMetadata( + inputs=self._inputs, outputs=self._outputs, + ) + return json_format.MessageToDict(current_md._pb) diff --git a/setup.py b/setup.py index d59e9f8cb1..18f307b5f2 100644 --- a/setup.py +++ b/setup.py @@ -29,9 +29,16 @@ with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() -tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] +tensorboard_extra_require = [ + "tensorflow >=2.3.0, <=2.5.0", + "grpcio~=1.34.0", + "six~=1.15.0", +] metadata_extra_require = ["pandas >= 1.0.0"] -full_extra_require = tensorboard_extra_require + metadata_extra_require +xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] +full_extra_require = list( + set(tensorboard_extra_require + metadata_extra_require + xai_extra_require) +) testing_extra_require = full_extra_require + ["grpcio-testing"] @@ -69,6 +76,7 @@ "metadata": metadata_extra_require, "tensorboard": tensorboard_extra_require, "testing": testing_extra_require, + "xai": xai_extra_require, }, python_requires=">=3.6", scripts=[], diff --git a/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py new file mode 100644 index 0000000000..e5e70bcea0 --- /dev/null +++ b/tests/unit/aiplatform/test_explain_saved_model_metadata_builder_test.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import tensorflow as tf +import numpy as np + +from google.cloud.aiplatform.explain.metadata.tf.v2 import saved_model_metadata_builder + + +class SavedModelMetadataBuilderTest(tf.test.TestCase): + def test_get_metadata_sequential(self): + # Set up for the sequential. + self.seq_model = tf.keras.models.Sequential() + self.seq_model.add(tf.keras.layers.Dense(32, activation="relu", input_dim=10)) + self.seq_model.add(tf.keras.layers.Dense(32, activation="relu")) + self.seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid")) + self.saved_model_path = self.get_temp_dir() + tf.saved_model.save(self.seq_model, self.saved_model_path) + + builder = saved_model_metadata_builder.SavedModelMetadataBuilder( + self.saved_model_path + ) + generated_md = builder.get_metadata() + expected_md = { + "outputs": {"dense_2": {"outputTensorName": "dense_2"}}, + "inputs": {"dense_input": {"inputTensorName": "dense_input"}}, + } + assert expected_md == generated_md + + def test_get_metadata_functional(self): + inputs1 = tf.keras.Input(shape=(10,), name="model_input1") + inputs2 = tf.keras.Input(shape=(10,), name="model_input2") + x = tf.keras.layers.Dense(32, activation="relu")(inputs1) + x = tf.keras.layers.Dense(32, activation="relu")(x) + x = tf.keras.layers.concatenate([x, inputs2]) + outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x) + fun_model = tf.keras.Model( + inputs=[inputs1, inputs2], outputs=outputs, name="fun" + ) + model_dir = self.get_temp_dir() + tf.saved_model.save(fun_model, model_dir) + builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir) + generated_md = builder.get_metadata() + expected_md = { + "inputs": { + "model_input1": {"inputTensorName": "model_input1"}, + "model_input2": {"inputTensorName": "model_input2"}, + }, + "outputs": {"dense_2": {"outputTensorName": "dense_2"}}, + } + assert expected_md == generated_md + + def test_get_metadata_subclassed_model(self): + class MyModel(tf.keras.Model): + def __init__(self, num_classes=2): + super(MyModel, self).__init__(name="my_model") + self.num_classes = num_classes + self.dense_1 = tf.keras.layers.Dense(32, activation="relu") + self.dense_2 = tf.keras.layers.Dense(num_classes, activation="sigmoid") + + def call(self, inputs): + x = self.dense_1(inputs) + return self.dense_2(x) + + subclassed_model = MyModel() + subclassed_model.compile(loss="categorical_crossentropy") + np.random.seed(0) + x_train = np.random.random((1, 100)) + y_train = np.random.randint(2, size=(1, 2)) + subclassed_model.fit(x_train, y_train, batch_size=1, epochs=1) + model_dir = self.get_temp_dir() + tf.saved_model.save(subclassed_model, model_dir) + + builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir) + generated_md = builder.get_metadata() + expected_md = { + "inputs": {"input_1": {"inputTensorName": "input_1"}}, + "outputs": {"output_1": {"outputTensorName": "output_1"}}, + } + assert expected_md == generated_md + + def test_non_keras_model(self): + class CustomModuleWithOutputName(tf.Module): + def __init__(self): + super(CustomModuleWithOutputName, self).__init__() + self.v = tf.Variable(1.0) + + @tf.function(input_signature=[tf.TensorSpec([], tf.float32)]) + def __call__(self, x): + return {"custom_output_name": x * self.v} + + module_output = CustomModuleWithOutputName() + call_output = module_output.__call__.get_concrete_function( + tf.TensorSpec(None, tf.float32) + ) + model_dir = self.get_temp_dir() + tf.saved_model.save( + module_output, model_dir, signatures={"serving_default": call_output} + ) + + builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir) + generated_md = builder.get_metadata() + expected_md = { + "inputs": {"x": {"inputTensorName": "x"}}, + "outputs": { + "custom_output_name": {"outputTensorName": "custom_output_name"} + }, + } + assert expected_md == generated_md + + def test_model_with_feature_column(self): + feature_columns = [ + tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_vocabulary_list( + "mode", ["fixed", "normal", "reversible"] + ), + dimension=8, + ), + tf.feature_column.numeric_column("age"), + ] + feature_layer = tf.keras.layers.DenseFeatures(feature_columns) + + model = tf.keras.Sequential( + [ + feature_layer, + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dense(1), + ] + ) + + model.compile( + optimizer="adam", + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=["accuracy"], + ) + + model.fit( + {"age": np.array([20, 1]), "mode": np.array(["fixed", "normal"])}, + np.array([0, 1]), + ) + model_dir = self.get_temp_dir() + tf.saved_model.save(model, model_dir) + builder = saved_model_metadata_builder.SavedModelMetadataBuilder(model_dir) + generated_md = builder.get_metadata() + expected_md = { + "inputs": { + "age": {"inputTensorName": "age", "modality": "categorical"}, + "mode": {"inputTensorName": "mode", "modality": "categorical"}, + }, + "outputs": {"output_1": {"outputTensorName": "output_1"}}, + } + assert expected_md == generated_md