Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add LIT methods for Pandas DataFrame and TensorFlow saved model. #874

Merged
merged 41 commits into from Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5fae793
Created create_lit_dataset for pandas Dataframe
taiseiak Nov 23, 2021
9e30a2f
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Nov 23, 2021
26d829e
Adding create_lit_model function for saved model.
taiseiak Nov 23, 2021
82042fe
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Nov 23, 2021
53a8582
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Nov 25, 2021
814a418
Fix tests and add lit model creation for 1in1out
taiseiak Nov 25, 2021
9b5a82e
Fix formatting and lint errors
taiseiak Nov 25, 2021
a6933c7
Add open_lit function.
taiseiak Nov 25, 2021
f4163d2
Add the set up and open lit method
taiseiak Nov 29, 2021
194b08a
Fix merge conflict
taiseiak Nov 29, 2021
895056d
Merge branch 'main' of https://github.com/googleapis/python-aiplatfor…
taiseiak Nov 29, 2021
6355175
Change names from dataset to created_dataset
taiseiak Nov 29, 2021
c8eaeb9
Change order of model and dataset for setup
taiseiak Nov 30, 2021
2a0d948
feat: fix long line in doc string
taiseiak Dec 1, 2021
5fbd99d
style: Remove comments of old code in test
taiseiak Dec 1, 2021
49fb2d8
Merge branch 'main' into explain-with-lit
taiseiak Dec 1, 2021
1f79c83
Merge branch 'main' into explain-with-lit
taiseiak Dec 2, 2021
8ef6a43
fix: Change OrderedDict typing to use strings to fix the python 3.6 e…
taiseiak Dec 2, 2021
97d1d20
Merge branch 'main' into explain-with-lit
taiseiak Dec 3, 2021
3bd0ead
Merge branch 'main' into explain-with-lit
sasha-gitg Dec 6, 2021
39fba85
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Dec 7, 2021
c389f0a
refactor: Changing the classes to be defined outside of the functions.
taiseiak Dec 7, 2021
9f3d3f0
Update google/cloud/aiplatform/explain/lit.py
taiseiak Dec 7, 2021
2070cc8
fix: Fix Optional typing from PR review.
taiseiak Dec 7, 2021
3d6886a
Merge branch 'explain-with-lit' of https://github.com/taiseiak/python…
taiseiak Dec 7, 2021
520632f
fix: fixing lint errors and adding back OrderedDict into the typing.
taiseiak Dec 8, 2021
12862a8
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Dec 8, 2021
fff0eac
Merge branch 'explain-with-lit' of https://github.com/taiseiak/python…
taiseiak Dec 8, 2021
935a538
test: Add test for open_lit function
taiseiak Dec 8, 2021
824a1ca
fix: Removing imports for lit methods in explain.
taiseiak Dec 8, 2021
31d2d10
test: Add tests for open_lit and set_up_and_open_lit, and refactor th…
taiseiak Dec 8, 2021
e7c9cfb
Merge branch 'googleapis:main' into explain-with-lit
taiseiak Dec 8, 2021
054d990
fix: Go back to OrderedDict forward reference and clean up imports in…
taiseiak Dec 9, 2021
f8925d6
fix: Change Dataframe to DataFrame
taiseiak Dec 9, 2021
9b695dc
docs: Add docstrings to the Vertex LIT class methods.
taiseiak Dec 14, 2021
d2874ca
Merge branch 'main' into explain-with-lit
taiseiak Dec 14, 2021
1e9e6e3
fix: Fix lint error
taiseiak Dec 14, 2021
484cc84
Merge branch 'main' into explain-with-lit
sasha-gitg Dec 14, 2021
0040429
Merge branch 'main' into explain-with-lit
sasha-gitg Dec 16, 2021
6ebdfc1
Merge branch 'main' into explain-with-lit
sasha-gitg Dec 16, 2021
cfc02da
Merge branch 'main' into explain-with-lit
sasha-gitg Dec 16, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions google/cloud/aiplatform/explain/__init__.py
Expand Up @@ -20,6 +20,13 @@
explanation_metadata as explanation_metadata_compat,
)

from google.cloud.aiplatform.explain.lit import (
create_lit_dataset,
create_lit_model,
open_lit,
set_up_and_open_lit,
)

ExplanationMetadata = explanation_metadata_compat.ExplanationMetadata

# ExplanationMetadata subclasses
Expand All @@ -43,15 +50,19 @@


__all__ = (
"create_lit_dataset",
"create_lit_model",
"Encoding",
"ExplanationMetadata",
"ExplanationParameters",
"FeatureNoiseSigma",
"FeatureValueDomain",
"InputMetadata",
"IntegratedGradientsAttribution",
"open_lit",
"OutputMetadata",
"SampledShapleyAttribution",
"set_up_and_open_lit",
"SmoothGradConfig",
"Visualization",
"XraiAttribution",
Expand Down
220 changes: 220 additions & 0 deletions google/cloud/aiplatform/explain/lit.py
@@ -0,0 +1,220 @@
# -*- coding: utf-8 -*-

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys

from typing import Dict, List, Optional, Tuple, Union


def create_lit_dataset(
dataset: "pd.Dataframe", # noqa: F821
column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821
sasha-gitg marked this conversation as resolved.
Show resolved Hide resolved
) -> "lit_dataset.Dataset": # noqa: F821
"""Creates a LIT Dataset object.
Args:
dataset:
Required. A Pandas Dataframe that includes feature column names and data.
column_types:
Required. An OrderedDict of string names matching the columns of the dataset
as the key, and the associated LitType of the column.
Returns:
A LIT Dataset object that has the data from the dataset provided.
Raises:
ImportError if LIT or Pandas is not installed.
"""
if "pandas" not in sys.modules:
raise ImportError(
"Pandas is not installed and is required to read the dataset. "
'Please install Pandas using "pip install python-aiplatform[lit]"'
)
try:
from lit_nlp.api import dataset as lit_dataset
except ImportError:
raise ImportError(
"LIT is not installed and is required to get Dataset as the return format. "
'Please install the SDK using "pip install python-aiplatform[lit]"'
)

class VertexLitDataset(lit_dataset.Dataset):
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self):
self._examples = dataset.to_dict(orient="records")

def spec(self):
return dict(column_types)

return VertexLitDataset()


def create_lit_model(
model: str,
input_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821
output_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821
sasha-gitg marked this conversation as resolved.
Show resolved Hide resolved
) -> "lit_model.Model": # noqa: F821
"""Creates a LIT Model object.
Args:
model:
Required. A string reference to a TensorFlow saved model directory. The model must have at most one input and one output tensor.
sasha-gitg marked this conversation as resolved.
Show resolved Hide resolved
input_types:
Required. An OrderedDict of string names matching the features of the model
as the key, and the associated LitType of the feature.
output_types:
Required. An OrderedDict of string names matching the labels of the model
as the key, and the associated LitType of the label.
Returns:
A LIT Model object that has the same functionality as the model provided.
Raises:
ImportError if LIT or TensorFlow is not installed.
ValueError if the model doesn't have only 1 input and output tensor.
"""
try:
import tensorflow as tf
except ImportError:
raise ImportError(
"Tensorflow is not installed and is required to load saved model. "
'Please install the SDK using "pip install pip install python-aiplatform[lit]"'
)

try:
from lit_nlp.api import model as lit_model
from lit_nlp.api import types as lit_types
except ImportError:
raise ImportError(
"LIT is not installed and is required to get Dataset as the return format. "
'Please install the SDK using "pip install python-aiplatform[lit]"'
)

loaded_model = tf.saved_model.load(model)
serving_default = loaded_model.signatures[
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
]
_, kwargs_signature = serving_default.structured_input_signature
output_signature = serving_default.structured_outputs

if len(kwargs_signature) != 1:
raise ValueError("Please use a model with only one input tensor.")

if len(output_signature) != 1:
raise ValueError("Please use a model with only one output tensor.")

class VertexLitModel(lit_model.Model):
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
def predict_minibatch(
self, inputs: List["lit_types.JsonDict"]
) -> List["lit_types.JsonDict"]:
instances = []
for input in inputs:
instance = [input[feature] for feature in input_types]
instances.append(instance)
prediction_input_dict = {
next(iter(kwargs_signature)): tf.convert_to_tensor(instances)
}
prediction_dict = loaded_model.signatures[
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
](**prediction_input_dict)
predictions = prediction_dict[next(iter(output_signature))].numpy()
outputs = []
for prediction in predictions:
outputs.append(
{
label: value
for label, value in zip(output_types.keys(), prediction)
}
)
return outputs

def input_spec(self) -> "lit_types.Spec":
return input_types

def output_spec(self) -> "lit_types.Spec":
return output_types

return VertexLitModel()


def open_lit(
models: Dict[str, "lit_model.Model"], # noqa: F821
datasets: Dict[str, "lit_dataset.Dataset"], # noqa: F821
open_in_new_tab: Optional[bool] = True,
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
):
"""Open LIT from the provided models and datasets.
Args:
models:
Required. A list of LIT models to open LIT with.
input_types:
Required. A lit of LIT datasets to open LIT with.
open_in_new_tab:
Optional. A boolean to choose if LIT open in a new tab or not.
Raises:
ImportError if LIT or TensorFlow is not installed.
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
"""
try:
from lit_nlp import notebook
except ImportError:
raise ImportError(
"LIT is not installed and is required to open LIT. "
'Please install the SDK using "pip install python-aiplatform[lit]"'
)

widget = notebook.LitWidget(models, datasets, open_in_new_tab=open_in_new_tab)
widget.render()


def set_up_and_open_lit(
dataset: Union["Pd.Dataframe", "lit_dataset.Dataset"], # noqa: F821
column_types: "OrderedDict[str, lit_types.LitType]", # noqa: F821
model: Union[str, "lit_model.Model"], # noqa: F821
input_types: Union[List[str], Dict[str, "LitType"]], # noqa: F821
output_types: Union[str, List[str], Dict[str, "LitType"]], # noqa: F821
open_in_new_tab: Optional[bool] = True,
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
) -> Tuple["lit_dataset.Dataset", "lit_model.Model"]: # noqa: F821
"""Creates a LIT dataset and model and opens LIT.
Args:
dataset:
Required. A Pandas Dataframe that includes feature column names and data.
column_types:
Required. An OrderedDict of string names matching the columns of the dataset
as the key, and the associated LitType of the column.
model:
Required. A string reference to a TensorFlow saved model directory.
The model must have at most one input and one output tensor.
input_types:
Required. An OrderedDict of string names matching the features of the model
as the key, and the associated LitType of the feature.
output_types:
Required. An OrderedDict of string names matching the labels of the model
as the key, and the associated LitType of the label.
Returns:
A Tuple of the LIT dataset and model created.
Raises:
ImportError if LIT or TensorFlow is not installed.
ValueError if the model doesn't have only 1 input and output tensor.
"""
try:
from lit_nlp.api import dataset as lit_dataset
from lit_nlp.api import model as lit_model
except ImportError:
raise ImportError(
"LIT is not installed and is required to get Dataset as the return format. "
'Please install the SDK using "pip install python-aiplatform[lit]"'
)
if not isinstance(dataset, lit_dataset.Dataset):
dataset = create_lit_dataset(dataset, column_types)

if not isinstance(model, lit_model.Model):
model = create_lit_model(model, input_types, output_types)

open_lit({"model": model}, {"dataset": dataset}, open_in_new_tab=open_in_new_tab)

return dataset, model
11 changes: 9 additions & 2 deletions setup.py
Expand Up @@ -36,14 +36,20 @@
tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
metadata_extra_require = ["pandas >= 1.0.0"]
xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"]
lit_extra_require = ["tensorflow >= 2.3.0", "pandas >= 1.0.0", "lit-nlp >= 0.4.0"]
profiler_extra_require = [
"tensorboard-plugin-profile >= 2.4.0",
"werkzeug >= 2.0.0",
"tensorflow >=2.4.0",
]

full_extra_require = list(
set(tensorboard_extra_require + metadata_extra_require + xai_extra_require)
set(
tensorboard_extra_require
+ metadata_extra_require
+ xai_extra_require
+ lit_extra_require
)
)
testing_extra_require = (
full_extra_require + profiler_extra_require + ["grpcio-testing", "pytest-xdist"]
Expand Down Expand Up @@ -88,7 +94,8 @@
"tensorboard": tensorboard_extra_require,
"testing": testing_extra_require,
"xai": xai_extra_require,
"cloud-profiler": profiler_extra_require,
"lit": lit_extra_require,
"cloud_profiler": profiler_extra_require,
},
python_requires=">=3.6",
scripts=[],
Expand Down
83 changes: 83 additions & 0 deletions tests/unit/aiplatform/test_explain_lit.py
@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-

# Copyright 2020 Google LLC
taiseiak marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import tensorflow as tf
import pandas as pd

from lit_nlp.api import types as lit_types
from google.cloud.aiplatform.explain.lit import (
create_lit_dataset,
create_lit_model,
)


class TestLit(tf.test.TestCase):
def _set_up_sequential(self):
# Set up a sequential model
self.seq_model = tf.keras.models.Sequential()
self.seq_model.add(
tf.keras.layers.Dense(32, activation="relu", input_shape=(2,))
)
self.seq_model.add(tf.keras.layers.Dense(32, activation="relu"))
self.seq_model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
self.saved_model_path = self.get_temp_dir()
tf.saved_model.save(self.seq_model, self.saved_model_path)
feature_types = collections.OrderedDict(
[("feature_1", lit_types.Scalar()), ("feature_2", lit_types.Scalar())]
)
label_types = collections.OrderedDict([("label", lit_types.RegressionScore())])
return feature_types, label_types

def _set_up_pandas_dataframe_and_columns(self):
dataframe = pd.DataFrame.from_dict(
{"feature_1": [1.0, 2.0], "feature_2": [3.0, 4.0], "label": [1.0, 0.0]}
)
columns = collections.OrderedDict(
[
("feature_1", lit_types.Scalar()),
("feature_2", lit_types.Scalar()),
("label", lit_types.RegressionScore()),
]
)
return dataframe, columns

def test_create_lit_dataset_from_pandas_returns_dataset(self):
pd_dataset, lit_columns = self._set_up_pandas_dataframe_and_columns()
lit_dataset = create_lit_dataset(pd_dataset, lit_columns)
expected_examples = [
{"feature_1": 1.0, "feature_2": 3.0, "label": 1.0},
{"feature_1": 2.0, "feature_2": 4.0, "label": 0.0},
]

assert lit_dataset.spec() == dict(lit_columns)
assert expected_examples == lit_dataset._examples

def test_create_lit_model_from_tensorflow_returns_model(self):
feature_types, label_types = self._set_up_sequential()
lit_model = create_lit_model(self.saved_model_path, feature_types, label_types)
test_inputs = [
{"feature_1": 1.0, "feature_2": 2.0},
{"feature_1": 3.0, "feature_2": 4.0},
]
outputs = lit_model.predict_minibatch(test_inputs)

assert lit_model.input_spec() == dict(feature_types)
assert lit_model.output_spec() == dict(label_types)
assert len(outputs) == 2
for item in outputs:
assert item.keys() == {"label"}
assert len(item.values()) == 1
taiseiak marked this conversation as resolved.
Show resolved Hide resolved