googleapis · gcf-merge-on-green · Dec 16, 2021 · Nov 23, 2021 · Nov 23, 2021 · Nov 23, 2021
diff --git a/google/cloud/aiplatform/explain/__init__.py b/google/cloud/aiplatform/explain/__init__.py
@@ -20,6 +20,13 @@
     explanation_metadata as explanation_metadata_compat,
 )
 
+from google.cloud.aiplatform.explain.lit import (
+    create_lit_dataset,
+    create_lit_model,
+    open_lit,
+    set_up_and_open_lit,
+)
+
 ExplanationMetadata = explanation_metadata_compat.ExplanationMetadata
 
 # ExplanationMetadata subclasses
@@ -43,15 +50,19 @@
 
 
 __all__ = (
+    "create_lit_dataset",
+    "create_lit_model",
     "Encoding",
     "ExplanationMetadata",
     "ExplanationParameters",
     "FeatureNoiseSigma",
     "FeatureValueDomain",
     "InputMetadata",
     "IntegratedGradientsAttribution",
+    "open_lit",
     "OutputMetadata",
     "SampledShapleyAttribution",
+    "set_up_and_open_lit",
     "SmoothGradConfig",
     "Visualization",
     "XraiAttribution",

diff --git a/google/cloud/aiplatform/explain/lit.py b/google/cloud/aiplatform/explain/lit.py
@@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from typing import Dict, List, Optional, Tuple, Union
+
+
+def create_lit_dataset(
+    dataset: "pd.Dataframe",  # noqa: F821
+    column_types: "OrderedDict[str, lit_types.LitType]",  # noqa: F821
+) -> "lit_dataset.Dataset":  # noqa: F821
+    """Creates a LIT Dataset object.
+        Args:
+          dataset:
+              Required. A Pandas Dataframe that includes feature column names and data.
+          column_types:
+              Required. An OrderedDict of string names matching the columns of the dataset
+              as the key, and the associated LitType of the column.
+        Returns:
+            A LIT Dataset object that has the data from the dataset provided.
+        Raises:
+            ImportError if LIT or Pandas is not installed.
+    """
+    if "pandas" not in sys.modules:
+        raise ImportError(
+            "Pandas is not installed and is required to read the dataset. "
+            'Please install Pandas using "pip install python-aiplatform[lit]"'
+        )
+    try:
+        from lit_nlp.api import dataset as lit_dataset
+    except ImportError:
+        raise ImportError(
+            "LIT is not installed and is required to get Dataset as the return format. "
+            'Please install the SDK using "pip install python-aiplatform[lit]"'
+        )
+
+    class VertexLitDataset(lit_dataset.Dataset):
+        def __init__(self):
+            self._examples = dataset.to_dict(orient="records")
+
+        def spec(self):
+            return dict(column_types)
+
+    return VertexLitDataset()
+
+
+def create_lit_model(
+    model: str,
+    input_types: "OrderedDict[str, lit_types.LitType]",  # noqa: F821
+    output_types: "OrderedDict[str, lit_types.LitType]",  # noqa: F821
+) -> "lit_model.Model":  # noqa: F821
+    """Creates a LIT Model object.
+        Args:
+          model:
+              Required. A string reference to a TensorFlow saved model directory. The model must have at most one input and one output tensor.
+          input_types:
+              Required. An OrderedDict of string names matching the features of the model
+              as the key, and the associated LitType of the feature.
+          output_types:
+              Required. An OrderedDict of string names matching the labels of the model
+              as the key, and the associated LitType of the label.
+        Returns:
+            A LIT Model object that has the same functionality as the model provided.
+        Raises:
+            ImportError if LIT or TensorFlow is not installed.
+            ValueError if the model doesn't have only 1 input and output tensor.
+    """
+    try:
+        import tensorflow as tf
+    except ImportError:
+        raise ImportError(
+            "Tensorflow is not installed and is required to load saved model. "
+            'Please install the SDK using "pip install pip install python-aiplatform[lit]"'
+        )
+
+    try:
+        from lit_nlp.api import model as lit_model
+        from lit_nlp.api import types as lit_types
+    except ImportError:
+        raise ImportError(
+            "LIT is not installed and is required to get Dataset as the return format. "
+            'Please install the SDK using "pip install python-aiplatform[lit]"'
+        )
+
+    loaded_model = tf.saved_model.load(model)
+    serving_default = loaded_model.signatures[
+        tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+    ]
+    _, kwargs_signature = serving_default.structured_input_signature
+    output_signature = serving_default.structured_outputs
+
+    if len(kwargs_signature) != 1:
+        raise ValueError("Please use a model with only one input tensor.")
+
+    if len(output_signature) != 1:
+        raise ValueError("Please use a model with only one output tensor.")
+
+    class VertexLitModel(lit_model.Model):
+        def predict_minibatch(
+            self, inputs: List["lit_types.JsonDict"]
+        ) -> List["lit_types.JsonDict"]:
+            instances = []
+            for input in inputs:
+                instance = [input[feature] for feature in input_types]
+                instances.append(instance)
+            prediction_input_dict = {
+                next(iter(kwargs_signature)): tf.convert_to_tensor(instances)
+            }
+            prediction_dict = loaded_model.signatures[
+                tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+            ](**prediction_input_dict)
+            predictions = prediction_dict[next(iter(output_signature))].numpy()
+            outputs = []
+            for prediction in predictions:
+                outputs.append(
+                    {
+                        label: value
+                        for label, value in zip(output_types.keys(), prediction)
+                    }
+                )
+            return outputs
+
+        def input_spec(self) -> "lit_types.Spec":
+            return input_types
+
+        def output_spec(self) -> "lit_types.Spec":
+            return output_types
+
+    return VertexLitModel()
+
+
+def open_lit(
+    models: Dict[str, "lit_model.Model"],  # noqa: F821
+    datasets: Dict[str, "lit_dataset.Dataset"],  # noqa: F821
+    open_in_new_tab: Optional[bool] = True,
+):
+    """Open LIT from the provided models and datasets.
+        Args:
+          models:
+              Required. A list of LIT models to open LIT with.
+          input_types:
+              Required. A lit of LIT datasets to open LIT with.
+          open_in_new_tab:
+              Optional. A boolean to choose if LIT open in a new tab or not.
+        Raises:
+            ImportError if LIT or TensorFlow is not installed.
+    """
+    try:
+        from lit_nlp import notebook
+    except ImportError:
+        raise ImportError(
+            "LIT is not installed and is required to open LIT. "
+            'Please install the SDK using "pip install python-aiplatform[lit]"'
+        )
+
+    widget = notebook.LitWidget(models, datasets, open_in_new_tab=open_in_new_tab)
+    widget.render()
+
+
+def set_up_and_open_lit(
+    dataset: Union["Pd.Dataframe", "lit_dataset.Dataset"],  # noqa: F821
+    column_types: "OrderedDict[str, lit_types.LitType]",  # noqa: F821
+    model: Union[str, "lit_model.Model"],  # noqa: F821
+    input_types: Union[List[str], Dict[str, "LitType"]],  # noqa: F821
+    output_types: Union[str, List[str], Dict[str, "LitType"]],  # noqa: F821
+    open_in_new_tab: Optional[bool] = True,
+) -> Tuple["lit_dataset.Dataset", "lit_model.Model"]:  # noqa: F821
+    """Creates a LIT dataset and model and opens LIT.
+        Args:
+        dataset:
+            Required. A Pandas Dataframe that includes feature column names and data.
+        column_types:
+            Required. An OrderedDict of string names matching the columns of the dataset
+            as the key, and the associated LitType of the column.
+        model:
+            Required. A string reference to a TensorFlow saved model directory.
+            The model must have at most one input and one output tensor.
+        input_types:
+            Required. An OrderedDict of string names matching the features of the model
+            as the key, and the associated LitType of the feature.
+        output_types:
+            Required. An OrderedDict of string names matching the labels of the model
+            as the key, and the associated LitType of the label.
+        Returns:
+            A Tuple of the LIT dataset and model created.
+        Raises:
+            ImportError if LIT or TensorFlow is not installed.
+            ValueError if the model doesn't have only 1 input and output tensor.
+    """
+    try:
+        from lit_nlp.api import dataset as lit_dataset
+        from lit_nlp.api import model as lit_model
+    except ImportError:
+        raise ImportError(
+            "LIT is not installed and is required to get Dataset as the return format. "
+            'Please install the SDK using "pip install python-aiplatform[lit]"'
+        )
+    if not isinstance(dataset, lit_dataset.Dataset):
+        dataset = create_lit_dataset(dataset, column_types)
+
+    if not isinstance(model, lit_model.Model):
+        model = create_lit_model(model, input_types, output_types)
+
+    open_lit({"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab)
+
+    return dataset, model
diff --git a/setup.py b/setup.py
@@ -36,14 +36,20 @@
 tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
 metadata_extra_require = ["pandas >= 1.0.0"]
 xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
+lit_extra_require = ["tensorflow >= 2.3.0", "pandas >= 1.0.0", "lit-nlp >= 0.4.0"]
 profiler_extra_require = [
     "tensorboard-plugin-profile >= 2.4.0",
     "werkzeug >= 2.0.0",
     "tensorflow >=2.4.0",
 ]
 
 full_extra_require = list(
-    set(tensorboard_extra_require + metadata_extra_require + xai_extra_require)
+    set(
+        tensorboard_extra_require
+        + metadata_extra_require
+        + xai_extra_require
+        + lit_extra_require
+    )
 )
 testing_extra_require = (
     full_extra_require + profiler_extra_require + ["grpcio-testing", "pytest-xdist"]
@@ -88,7 +94,8 @@
         "tensorboard": tensorboard_extra_require,
         "testing": testing_extra_require,
         "xai": xai_extra_require,
-        "cloud-profiler": profiler_extra_require,
+        "lit": lit_extra_require,
+        "cloud_profiler": profiler_extra_require,
     },
     python_requires=">=3.6",
     scripts=[],

diff --git a/tests/unit/aiplatform/test_explain_lit.py b/tests/unit/aiplatform/test_explain_lit.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import tensorflow as tf
+import pandas as pd
+
+from lit_nlp.api import types as lit_types
+from google.cloud.aiplatform.explain.lit import (
+    create_lit_dataset,
+    create_lit_model,
+)
+
+
+class TestLit(tf.test.TestCase):
+    def _set_up_sequential(self):
+        # Set up a sequential model
+        self.seq_model = tf.keras.models.Sequential()
+        self.seq_model.add(
+            tf.keras.layers.Dense(32, activation="relu", input_shape=(2,))
+        )
+        self.seq_model.add(tf.keras.layers.Dense(32, activation="relu"))
+        self.seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
+        self.saved_model_path = self.get_temp_dir()
+        tf.saved_model.save(self.seq_model, self.saved_model_path)
+        feature_types = collections.OrderedDict(
+            [("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())]
+        )
+        label_types = collections.OrderedDict([("label", lit_types.RegressionScore())])
+        return feature_types, label_types
+
+    def _set_up_pandas_dataframe_and_columns(self):
+        dataframe = pd.DataFrame.from_dict(
+            {"feature_1": [1.0, 2.0], "feature_2": [3.0, 4.0], "label": [1.0, 0.0]}
+        )
+        columns = collections.OrderedDict(
+            [
+                ("feature_1", lit_types.Scalar()),
+                ("feature_2", lit_types.Scalar()),
+                ("label", lit_types.RegressionScore()),
+            ]
+        )
+        return dataframe, columns
+
+    def test_create_lit_dataset_from_pandas_returns_dataset(self):
+        pd_dataset, lit_columns = self._set_up_pandas_dataframe_and_columns()
+        lit_dataset = create_lit_dataset(pd_dataset, lit_columns)
+        expected_examples = [
+            {"feature_1": 1.0, "feature_2": 3.0, "label": 1.0},
+            {"feature_1": 2.0, "feature_2": 4.0, "label": 0.0},
+        ]
+
+        assert lit_dataset.spec() == dict(lit_columns)
+        assert expected_examples == lit_dataset._examples
+
+    def test_create_lit_model_from_tensorflow_returns_model(self):
+        feature_types, label_types = self._set_up_sequential()
+        lit_model = create_lit_model(self.saved_model_path, feature_types, label_types)
+        test_inputs = [
+            {"feature_1": 1.0, "feature_2": 2.0},
+            {"feature_1": 3.0, "feature_2": 4.0},
+        ]
+        outputs = lit_model.predict_minibatch(test_inputs)
+
+        assert lit_model.input_spec() == dict(feature_types)
+        assert lit_model.output_spec() == dict(label_types)
+        assert len(outputs) == 2
+        for item in outputs:
+            assert item.keys() == {"label"}
+            assert len(item.values()) == 1