Skip to content

Commit

Permalink
Remove that code, general cleanup.
Browse files Browse the repository at this point in the history
Signed-off-by: Terence Parr <parrt@antlr.org>
  • Loading branch information
parrt committed Dec 27, 2022
1 parent 4e536b6 commit b74db63
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 156 deletions.
1 change: 1 addition & 0 deletions dtreeviz/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .version import __version__

# NEW API
# import dtreeviz
# call m = dtreeviz.model(...) then m.view() etc...
from dtreeviz.utils import DTreeVizRender
from dtreeviz.trees import DTreeVizAPI, model
Expand Down
43 changes: 6 additions & 37 deletions dtreeviz/classifiers.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from typing import Tuple
import numpy as np
import pandas as pd

import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import ImageColor
from colour import Color
from matplotlib import patches as patches
from matplotlib.collections import PatchCollection
from colour import Color
from PIL import ImageColor

from dtreeviz.colors import adjust_colors, GREY
from dtreeviz import utils
from dtreeviz.colors import adjust_colors
from dtreeviz.utils import add_classifier_legend


def decision_boundaries(model, X: np.ndarray, y: np.ndarray,
Expand Down Expand Up @@ -536,34 +536,3 @@ def _predict_proba(model, X):

# sklearn etc...
return model.predict_proba(X)


def add_classifier_legend(ax, class_names, class_values, facecolors, target_name,
colors, fontsize=10, fontname='Arial'):
# add boxes for legend
boxes = []
for c in class_values:
box = patches.Rectangle((0, 0), 20, 10, linewidth=.4, edgecolor=colors['rect_edge'],
facecolor=facecolors[c], label=class_names[c])
boxes.append(box)
leg = ax.legend(handles=boxes,
frameon=True,
shadow=False,
fancybox=True,
handletextpad=.35,
borderpad=.8,
bbox_to_anchor=(1.0, 1.0),
edgecolor=colors['legend_edge'])

leg.set_title(target_name, prop={'size': fontsize,
'weight': 'bold',
'family': fontname})

leg.get_frame().set_linewidth(.5)
leg.get_title().set_color(colors['legend_title'])
leg.get_title().set_fontsize(fontsize)
leg.get_title().set_fontname(fontname)
# leg.get_title().set_fontweight('bold')
for text in leg.get_texts():
text.set_color(colors['text'])
text.set_fontsize(fontsize)
61 changes: 0 additions & 61 deletions dtreeviz/compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def rtreeviz_bivar_heatmap(tree_model,
Show tesselated 2D feature space for bivariate regression tree. X_train can
have lots of features but features lists indexes of 2 features to train tree with.
"""

warnings.warn("rtreeviz_bivar_heatmap() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_feature_space(...)",
DeprecationWarning, stacklevel=2)
Expand Down Expand Up @@ -93,7 +92,6 @@ def rtreeviz_bivar_3D(tree_model,
Show 3D feature space for bivariate regression tree. X_train should have
just the 2 variables used for training.
"""

warnings.warn("rtreeviz_bivar_3D() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.rtree_feature_space3D(...)",
DeprecationWarning, stacklevel=2)
Expand All @@ -116,7 +114,6 @@ def ctreeviz_univar(tree_model,
show={'title', 'legend', 'splits'},
colors=None,
ax=None):

warnings.warn("ctreeviz_univar() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.ctree_feature_space(...)",
DeprecationWarning, stacklevel=2)
Expand Down Expand Up @@ -248,7 +245,6 @@ def dtreeviz(tree_model,
:param scale: Default is 1.0. Scale the width, height of the overall SVG preserving aspect ratio
:return: A string in graphviz DOT language that describes the decision tree.
"""

warnings.warn("dtreeviz() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.view()",
DeprecationWarning, stacklevel=2)
Expand Down Expand Up @@ -345,7 +341,6 @@ def viz_leaf_samples(tree_model,
:param figsize: optional (width, height) in inches for the entire plot
:param ax: optional matplotlib "axes" to draw into
"""

warnings.warn("viz_leaf_samples() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.leaf_sizes()",
DeprecationWarning, stacklevel=2)
Expand Down Expand Up @@ -494,30 +489,6 @@ def ctreeviz_leaf_samples(tree_model,
model.ctree_leaf_distributions(display_type, plot_ylim, colors, fontsize, fontname, grid, figsize, ax)


def _get_leaf_target_input(shadow_tree: ShadowDecTree,
precision: int):
x = []
y = []
means = []
means_range = []
x_labels = []
sigma = .05
for i, node in enumerate(shadow_tree.leaves):
leaf_index_sample = node.samples()
leaf_target = shadow_tree.y_train[leaf_index_sample]
leaf_target_mean = np.mean(leaf_target)
np.random.seed(0) # generate the same list of random values for each call
X = np.random.normal(i, sigma, size=len(leaf_target))

x.extend(X)
y.extend(leaf_target)
means.append([leaf_target_mean, leaf_target_mean])
means_range.append([i - (sigma * 3), i + (sigma * 3)])
x_labels.append(f"{myround(leaf_target_mean, precision)}")

return x, y, means, means_range, x_labels


def viz_leaf_target(tree_model,
X_train: (pd.DataFrame, np.ndarray) = None,
y_train: (pd.DataFrame, np.ndarray) = None,
Expand Down Expand Up @@ -622,7 +593,6 @@ def describe_node_sample(tree_model,
:return: pd.DataFrame
Node training samples' stats
"""

warnings.warn("describe_node_sample() function is deprecated starting from version 2.0. \n "
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.node_stats()",
DeprecationWarning, stacklevel=2)
Expand Down Expand Up @@ -675,7 +645,6 @@ def explain_prediction_path(tree_model,
Required in case of tree ensemble. Specify the tree index to interpret.
"""

shadow_tree = ShadowDecTree.get_shadow_tree(tree_model, X_train, y_train, feature_names, None, class_names,
tree_index)
model = DTreeVizAPI(shadow_tree)
Expand All @@ -691,33 +660,3 @@ def explain_prediction_path(tree_model,
"For the same functionality, please use this code instead: \n m = dtreeviz.model(...) \n m.explain_prediction_path()",
DeprecationWarning, stacklevel=2)
return model.explain_prediction_path(x)


def model(model,
X_train,
y_train,
tree_index: int = None,
feature_names: List[str] = None,
target_name: str = None,
class_names: (List[str], Mapping[int, str]) = None
):
"""
Given a decision tree-based model from a supported decision-tree library, training data, and
information about the data, create a model adaptor that
provides a consistent interface for the overall dtreeviz lib to the various supported tree libraries.
Call methods such as v.view(), v.explain_prediction_path(), v.rtree_feature_space3D() on returned adaptor v.
:param model: A tree-based model from a supportive decision tree library, such as sklearn, XGBoost, and TensorFlow.
:param X_train: Features used to train model; 2D array-like object of shape (n_samples, n_features).
:param y_train: Classifier or regressor target used to train model; 1D array-like object of shape (n_samples, 1).
:param tree_index: Index (from 0) of tree if model is an ensemble of trees like a random forest.
:param feature_names: Names of features in the same order of X_train.
:param target_name: What is the (string) name of the target variable; e.g., for a house price regressor, this might be "price".
:param class_names: For classifiers, what are the names associated with the labels?
:return: a DTreeVizAPI object that provides the main API for dtreeviz (version 2.0.0+);
e.g., call the view() method on the return object to display it in a notebook.
"""
shadow_tree = ShadowDecTree.get_shadow_tree(model, X_train, y_train, feature_names, target_name, class_names,
tree_index)
dtreeviz_model = DTreeVizAPI(shadow_tree)
return dtreeviz_model
5 changes: 0 additions & 5 deletions dtreeviz/interpretation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
:return: str
Prediction path explanation in plain english.
"""

node_feature_index = shadow_tree.get_features()
feature_names = shadow_tree.feature_names
node_threshold = shadow_tree.get_thresholds()
Expand All @@ -42,7 +41,6 @@ def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
# TODO - refactor this logic and find a way to make it simpler
feature_smaller_values = {}
feature_bigger_values = {}
# feature_categorical_value = {}
feature_categorical_value = defaultdict(lambda: set())
feature_categorical_value_not_in = defaultdict(lambda: set())

Expand Down Expand Up @@ -86,7 +84,6 @@ def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
prediction_path_output += feature_range + "\n"

for feature_name in set(list(feature_categorical_value.keys()) + list(feature_categorical_value_not_in.keys())):
# prediction_path_output += f"{feature_name} in {feature_categorical_value[feature_name]} \n"
prediction_path_output += f"{feature_name}{' in ' + str(feature_categorical_value[feature_name]) if feature_name in feature_categorical_value else ''}" \
f"{' not in ' + str(feature_categorical_value_not_in[feature_name]) if feature_name in feature_categorical_value_not_in else ''} \n"

Expand Down Expand Up @@ -152,5 +149,3 @@ def explain_prediction_sklearn_default(shadow_tree: ShadowDecTree,
ax.set_xlabel("feature importance", fontsize=fontsize, fontname=fontname, color=colors['axis_label'])
ax.grid(b=grid)
return ax


20 changes: 0 additions & 20 deletions dtreeviz/models/shadow_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,6 @@ def get_split_node_heights(self, X_train, y_train, nbins) -> Mapping[int, int]:

bins = np.linspace(overall_feature_range[0],
overall_feature_range[1], nbins + 1)
# bins = np.arange(overall_feature_range[0],
# overall_feature_range[1] + binwidth, binwidth)
# print(f"\tlen(bins)={len(bins):2d} bins={bins}")
X, y = X_feature[node.samples()], y_train[node.samples()]
X_hist = [X[y == cl] for cl in class_values]
Expand Down Expand Up @@ -386,7 +384,6 @@ def get_leaf_sample_counts(self, min_samples=0, max_samples=None):
:return: tuple
Contains a numpy array of leaf ids and an array of leaf samples
"""

max_samples = max_samples if max_samples else max([node.nsamples() for node in self.leaves])
leaf_samples = [(node.id, node.nsamples()) for node in self.leaves if
min_samples <= node.nsamples() <= max_samples]
Expand All @@ -399,7 +396,6 @@ def get_leaf_criterion(self):
For classification, supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.
For regression, supported criteria are “mse”, “friedman_mse”, “mae”.
"""

leaf_criterion = [(node.id, node.criterion()) for node in self.leaves]
x, y = zip(*leaf_criterion)
return np.array(x), np.array(y)
Expand All @@ -410,7 +406,6 @@ def get_leaf_sample_counts_by_class(self):
:return: tuple
Contains a list of leaf ids and a two lists of leaf samples(one for each class)
"""

leaf_samples = [(node.id, node.n_sample_classes()[0], node.n_sample_classes()[1]) for node in self.leaves]
index, leaf_sample_0, leaf_samples_1 = zip(*leaf_samples)
return index, leaf_sample_0, leaf_samples_1
Expand Down Expand Up @@ -507,24 +502,20 @@ def __init__(self, shadow_tree: ShadowDecTree, id: int, left=None, right=None, l

def split(self) -> (int, float):
"""Returns the split/threshold value used at this node."""

return self.shadow_tree.get_node_split(self.id)

def feature(self) -> int:
"""Returns feature index used at this node"""

return self.shadow_tree.get_node_feature(self.id)

def feature_name(self) -> (str, None):
"""Returns the feature name used at this node"""

if self.shadow_tree.feature_names is not None:
return self.shadow_tree.feature_names[self.feature()]
return None

def samples(self) -> List[int]:
"""Returns samples indexes from this node"""

return self.shadow_tree.get_node_samples()[self.id]

def nsamples(self) -> int:
Expand All @@ -533,7 +524,6 @@ def nsamples(self) -> int:
used to compute the predicted value or class . If this is an internal node, it is the number of samples used
to compute the split point.
"""

return self.shadow_tree.get_node_nsamples(self.id)

# TODO
Expand All @@ -544,7 +534,6 @@ def n_sample_classes(self):
Returns the sample count values for each classes.
"""

samples = np.array(self.samples())
if samples.size == 0:
return [0, 0]
Expand All @@ -565,7 +554,6 @@ def criterion(self):

def split_samples(self) -> Tuple[np.ndarray, np.ndarray]:
"""Returns the list of indexes to the left and the right of the split value."""

return self.shadow_tree.get_split_samples(self.id)

def isleaf(self) -> bool:
Expand All @@ -582,14 +570,8 @@ def prediction(self) -> (Number, None):
If the node is an internal node, returns None
"""

if not self.isleaf():
return None
# if self.isclassifier():
# counts = self.shadow_tree.get_prediction_value(self.id)
# return np.argmax(counts)
# else:
# return self.shadow_tree.get_prediction_value(self.id)
return self.shadow_tree.get_prediction(self.id)

def prediction_name(self) -> (str, None):
Expand All @@ -599,7 +581,6 @@ def prediction_name(self) -> (str, None):
Return prediction class or value otherwise.
"""

if self.isclassifier():
if self.shadow_tree.class_names is not None:
return self.shadow_tree.class_names[self.prediction()]
Expand All @@ -609,7 +590,6 @@ def class_counts(self) -> (List[int], None):
"""
If this tree model is a classifier, return a list with the count associated with each class.
"""

if self.isclassifier():
if self.shadow_tree.get_class_weight() is None:
# return np.array(np.round(self.shadow_tree.tree_model.tree_.value[self.id][0]), dtype=int)
Expand Down
5 changes: 1 addition & 4 deletions dtreeviz/models/tensorflow_decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,20 +148,18 @@ def get_node_split(self, id) -> (int, float):
def get_node_feature(self, id) -> int:
return self.get_features()[id]

# TODO check if we can pun this method in the super class
# TODO check if we can put this method in the super class
def get_node_nsamples_by_class(self, id):
all_nodes = self.internal + self.leaves
if self.is_classifier():
node_value = [node.n_sample_classes() for node in all_nodes if node.id == id]
return node_value[0][0], node_value[0][1]

# TODO implement for regression tree
def get_prediction(self, id):
if self.is_classifier():
return np.argmax(self.tree_nodes[id].value.probability)
else:
return self.tree_nodes[id].value.value
# raise VisualisationNotYetSupportedError("get_prediction()", "TensorFlow Decision Forests2")

def is_categorical_split(self, id) -> bool:
node_condition = self.tree_nodes[id].condition
Expand Down Expand Up @@ -201,7 +199,6 @@ def _get_nodes_info(self):
Get individual node info and left/right child node. We are using a dict as a data structure to keep
the left and right child node info.
"""

tree_nodes = defaultdict(lambda: None)
children_left = defaultdict(lambda: -1)
children_right = defaultdict(lambda: -1)
Expand Down

0 comments on commit b74db63

Please sign in to comment.