Skip to content

Commit

Permalink
Merge pull request #44 from VesnaT/ice
Browse files Browse the repository at this point in the history
ICE: Init ICE widget
  • Loading branch information
lanzagar committed Aug 3, 2022
2 parents 2a131df + 3547f2b commit 334edee
Show file tree
Hide file tree
Showing 10 changed files with 1,214 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/index.rst
Expand Up @@ -12,6 +12,7 @@ Widgets
widgets/explain-model
widgets/explain-prediction
widgets/explain-predictions
widgets/ice

Indices and tables
==================
Expand Down
34 changes: 34 additions & 0 deletions doc/widgets/ice.md
@@ -0,0 +1,34 @@
ICE
===

Displays one line per instance that shows how the instance’s prediction changes when a feature changes.

**Inputs**

- Model: model
- Data: dataset

The **ICE** (Individual Conditional Expectation) widget visualizes the dependence of the prediction on a feature for each instance separately, resulting in one line per instance, compared to one line overall in partial dependence plots.


![](images/ICE.png)

1. Select a target class.
2. Select a feature.
3. Order features by importance (partial dependence averaged across all the samples).
4. Apply the color of a discrete feature.
5. If **Centered** is ticked, the plot lines will start at the origin of the y-axis.
5. If **Show mean** is ticked, the average across all the samples in the dataset is shown.
6. If **Send Automatically** is ticked, the output is sent automatically after any change.
Alternatively, click **Send**.
7. Get help, save the plot, make the report, set plot properties, or observe the size of input and output data.
8. Plot shows a line for each instance in the input dataset.

Example
-------

In the flowing example, we use the ICE widget to explain Random Forest model. In the File widget, we open the Housing dataset. We connect it to the Random Forest widget, which trains the model. The ICE widget accepts the model and data which are used to explain the model.

By selecting some arbitrary lines, the selected instances of the input dataset appear on the output of the ICE widget.

![](images/ICE-Example.png)
Binary file added doc/widgets/images/ICE-example.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/widgets/images/ICE.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
52 changes: 48 additions & 4 deletions orangecontrib/explain/inspection.py
@@ -1,12 +1,13 @@
""" Permutation feature importance for models. """
from typing import Callable
from typing import Callable, Tuple, Optional, Dict

import numpy as np
import scipy.sparse as sp
from sklearn.inspection import partial_dependence

from Orange.base import Model
from Orange.base import Model, SklModel
from Orange.classification import Model as ClsModel
from Orange.data import Table
from Orange.data import Table, Variable, DiscreteVariable
from Orange.evaluation import Results
from Orange.evaluation.scoring import Score, TargetScore, RegressionScore, R2
from Orange.regression import Model as RegModel
Expand All @@ -19,7 +20,7 @@ def permutation_feature_importance(
score: Score,
n_repeats: int = 5,
progress_callback: Callable = None
):
) -> np.ndarray:
"""
Function calculates feature importance of a model for a given data.
Expand Down Expand Up @@ -174,3 +175,46 @@ def _calculate_permutation_scores(

progress_callback(1)
return scores


def individual_condition_expectation(
model: SklModel,
data: Table,
feature: Variable,
grid_resolution: int = 1000,
kind: str = "both",
progress_callback: Callable = dummy_callback
) -> Dict[str, np.ndarray]:
progress_callback(0)
_check_data(data)
needs_pp = _check_model(model, data)
if needs_pp:
data = model.data_to_model_domain(data)

assert feature.name in [a.name for a in data.domain.attributes]
feature_index = data.domain.index(feature.name)

assert isinstance(model, SklModel), f"Model ({model}) is not supported."
progress_callback(0.1)

dep = partial_dependence(model.skl_model,
data.X,
[feature_index],
grid_resolution=grid_resolution,
kind=kind)

results = {"average": dep["average"], "values": dep["values"][0]}
if kind == "both":
results["individual"] = dep["individual"]

if data.domain.has_discrete_class and \
len(data.domain.class_var.values) == 2:
results = {"average": np.vstack([1 - dep["average"], dep["average"]]),
"values": dep["values"][0]}
if kind == "both":
results["individual"] = \
np.vstack([1 - dep["individual"], dep["individual"]])

progress_callback(1)

return results
102 changes: 99 additions & 3 deletions orangecontrib/explain/tests/test_inspection.py
Expand Up @@ -3,19 +3,19 @@
import pkg_resources

import numpy as np
from sklearn.inspection import permutation_importance
from sklearn.inspection import permutation_importance, partial_dependence

from Orange.base import Model
from Orange.classification import NaiveBayesLearner, RandomForestLearner, \
LogisticRegressionLearner, TreeLearner
from Orange.data import Table, Domain
from Orange.data import Table, Domain, DiscreteVariable
from Orange.data.table import DomainTransformationError
from Orange.evaluation import CA, MSE, AUC
from Orange.regression import RandomForestRegressionLearner, \
TreeLearner as TreeRegressionLearner

from orangecontrib.explain.inspection import permutation_feature_importance, \
_wrap_score, _check_model
_wrap_score, _check_model, individual_condition_expectation


def _permutation_feature_importance_skl(
Expand Down Expand Up @@ -284,5 +284,101 @@ def test_sparse_data(self):
)


class TestIndividualConditionalExpectation(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.iris = Table.from_file("iris")
cls.heart = Table.from_file("heart_disease")
cls.housing = Table.from_file("housing")

def test_discrete_class(self):
data = self.iris[:100]
class_var = DiscreteVariable("iris", data.domain.class_var.values[:2])
data = data.transform(Domain(data.domain.attributes, class_var))
model = RandomForestLearner(n_estimators=10, random_state=0)(data)
res = individual_condition_expectation(model, data, data.domain[0])
self.assertIsInstance(res, dict)
self.assertEqual(res["average"].shape, (2, 28))
self.assertEqual(res["individual"].shape, (2, 100, 28))
self.assertEqual(res["values"].shape, (28,))

def test_discrete_class_result_values(self):
data = self.iris[:100]
class_var = DiscreteVariable("iris", data.domain.class_var.values[:2])
data = data.transform(Domain(data.domain.attributes, class_var))
model1 = RandomForestLearner(n_estimators=10, random_state=0)(data)

data.Y = np.abs(data.Y - 1)
model2 = RandomForestLearner(n_estimators=10, random_state=0)(data)

res = individual_condition_expectation(model1, data, data.domain[0])
dep1 = partial_dependence(model1.skl_model, data.X, [0], kind="both")
dep2 = partial_dependence(model2.skl_model, data.X, [0], kind="both")
np.testing.assert_array_almost_equal(
res["average"][:1], dep2["average"])
np.testing.assert_array_almost_equal(
res["average"][1:], dep1["average"])
np.testing.assert_array_almost_equal(
res["individual"][:1], dep2["individual"])
np.testing.assert_array_almost_equal(
res["individual"][1:], dep1["individual"])

def test_continuous_class(self):
data = self.housing
model = RandomForestRegressionLearner(n_estimators=10, random_state=0)(data)
res = individual_condition_expectation(model, data, data.domain[0])
self.assertIsInstance(res, dict)
self.assertEqual(res["average"].shape, (1, 504))
self.assertEqual(res["individual"].shape, (1, 506, 504))
self.assertEqual(res["values"].shape, (504,))

def test_multi_class(self):
data = self.iris
model = RandomForestLearner(n_estimators=10, random_state=0)(data)
res = individual_condition_expectation(model, data, data.domain[0])
self.assertIsInstance(res, dict)
self.assertEqual(res["average"].shape, (3, 35))
self.assertEqual(res["individual"].shape, (3, 150, 35))
self.assertEqual(res["values"].shape, (35,))

def test_mixed_features(self):
data = self.heart
model = RandomForestLearner(n_estimators=10, random_state=0)(data)
res = individual_condition_expectation(model, data, data.domain[0])
self.assertIsInstance(res, dict)
self.assertEqual(res["average"].shape, (2, 41))
self.assertEqual(res["individual"].shape, (2, 303, 41))
self.assertEqual(res["values"].shape, (41,))

def _test_sklearn(self):
from matplotlib import pyplot as plt
from sklearn.ensemble import RandomForestClassifier, \
RandomForestRegressor
from sklearn.inspection import PartialDependenceDisplay

X = self.housing.X
y = self.housing.Y
model = RandomForestRegressor(random_state=0)

# X = self.iris.X[:100]
# y = self.iris.Y[:100]
# y = np.abs(y - 1)
# model = RandomForestClassifier(random_state=0)
model.fit(X, y)
display = PartialDependenceDisplay.from_estimator(
model,
X,
[X.shape[1] - 1],
target=0,
kind="both",
centered=True,
subsample=1000,
# grid_resolution=100,
random_state=0,
)

plt.show()


if __name__ == "__main__":
unittest.main()
23 changes: 23 additions & 0 deletions orangecontrib/explain/widgets/icons/ICE.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 334edee

Please sign in to comment.