Skip to content

Commit

Permalink
Enanchment in CF Generation
Browse files Browse the repository at this point in the history
  • Loading branch information
giandos200 committed Jan 13, 2022
1 parent e053592 commit 86dc77f
Show file tree
Hide file tree
Showing 22 changed files with 132 additions and 149 deletions.
2 changes: 1 addition & 1 deletion dice_ml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .data import Data
from .dice import Dice
from .model import Model
from .dice import Dice

__all__ = ["Data",
"Model",
Expand Down
9 changes: 4 additions & 5 deletions dice_ml/counterfactual_explanations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import json
import os

import jsonschema
import os

from dice_ml.constants import _SchemaVersions
from dice_ml.diverse_counterfactuals import (CounterfactualExamples,
_DiverseCFV2SchemaConstants)
from dice_ml.diverse_counterfactuals import CounterfactualExamples
from dice_ml.utils.exception import UserConfigValidationException
from dice_ml.diverse_counterfactuals import _DiverseCFV2SchemaConstants
from dice_ml.constants import _SchemaVersions


class _CommonSchemaConstants:
Expand Down
7 changes: 3 additions & 4 deletions dice_ml/data_interfaces/private_data_interface.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Module containing meta data information about private data."""

import collections
import logging
import sys

import numpy as np
import pandas as pd
import numpy as np
import collections
import logging

from dice_ml.data_interfaces.base_data_interface import _BaseData

Expand Down
10 changes: 4 additions & 6 deletions dice_ml/data_interfaces/public_data_interface.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
"""Module containing all required information about the interface between raw (or transformed)
public data and DiCE explainers."""

import pandas as pd
import numpy as np
import logging
from collections import defaultdict

import numpy as np
import pandas as pd

from dice_ml.data_interfaces.base_data_interface import _BaseData
from dice_ml.utils.exception import (SystemException,
UserConfigValidationException)
from dice_ml.utils.exception import SystemException, UserConfigValidationException


class PublicData(_BaseData):
Expand Down Expand Up @@ -260,7 +258,7 @@ def get_valid_feature_range(self, feature_range_input, normalized=True):
"""
feature_range = {}

for _, feature_name in enumerate(self.feature_names):
for idx, feature_name in enumerate(self.feature_names):
feature_range[feature_name] = []
if feature_name in self.continuous_feature_names:
max_value = self.data_df[feature_name].max()
Expand Down
10 changes: 4 additions & 6 deletions dice_ml/dice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
such as RandomSampling, DiCEKD or DiCEGenetic"""

from dice_ml.constants import BackEndTypes, SamplingStrategy
from dice_ml.data_interfaces.private_data_interface import PrivateData
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
from dice_ml.utils.exception import UserConfigValidationException
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
from dice_ml.data_interfaces.private_data_interface import PrivateData


class Dice(ExplainerBase):
Expand Down Expand Up @@ -67,14 +67,12 @@ def decide(model_interface, method):

elif model_interface.backend == BackEndTypes.Tensorflow1:
# pretrained Keras Sequential model with Tensorflow 1.x backend
from dice_ml.explainer_interfaces.dice_tensorflow1 import \
DiceTensorFlow1
from dice_ml.explainer_interfaces.dice_tensorflow1 import DiceTensorFlow1
return DiceTensorFlow1

elif model_interface.backend == BackEndTypes.Tensorflow2:
# pretrained Keras Sequential model with Tensorflow 2.x backend
from dice_ml.explainer_interfaces.dice_tensorflow2 import \
DiceTensorFlow2
from dice_ml.explainer_interfaces.dice_tensorflow2 import DiceTensorFlow2
return DiceTensorFlow2

elif model_interface.backend == BackEndTypes.Pytorch:
Expand Down
7 changes: 2 additions & 5 deletions dice_ml/diverse_counterfactuals.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import pandas as pd
import copy
import json

import pandas as pd

from dice_ml.constants import ModelTypes, _SchemaVersions
from dice_ml.utils.serialize import DummyDataInterface
from dice_ml.constants import _SchemaVersions, ModelTypes


class _DiverseCFV1SchemaConstants:
Expand Down Expand Up @@ -117,7 +115,6 @@ def _visualize_internal(self, display_sparse_df=True, show_only_changes=False,

def visualize_as_dataframe(self, display_sparse_df=True, show_only_changes=False):
from IPython.display import display

# original instance
print('Query instance (original outcome : %i)' % round(self.test_pred))
display(self.test_instance_df) # works only in Jupyter notebook
Expand Down
13 changes: 8 additions & 5 deletions dice_ml/explainer_interfaces/dice_KD.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
Module to generate counterfactual explanations from a KD-Tree
This code is similar to 'Interpretable Counterfactual Explanations Guided by Prototypes': https://arxiv.org/pdf/1907.02584.pdf
"""
import copy
import timeit

from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import numpy as np
import timeit
import pandas as pd
import copy

from dice_ml import diverse_counterfactuals as exp
from dice_ml.constants import ModelTypes
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DiceKD(ExplainerBase):
Expand Down Expand Up @@ -260,10 +259,14 @@ def find_counterfactuals(self, data_df_copy, query_instance, query_instance_orig
if total_cfs_found < total_CFs:
self.elapsed = timeit.default_timer() - start_time
m, s = divmod(self.elapsed, 60)
print('Only %d (required %d) ' % (total_cfs_found, self.total_CFs),
print('Only %d (required %d) ' % (total_cfs_found, total_CFs),
'Diverse Counterfactuals found for the given configuation, perhaps ',
'change the query instance or the features to vary...' '; total time taken: %02d' % m,
'min %02d' % s, 'sec')
elif total_cfs_found == 0:
print(
'No Counterfactuals found for the given configuration, perhaps try with different parameters...',
'; total time taken: %02d' % m, 'min %02d' % s, 'sec')
else:
print('Diverse Counterfactuals found! total time taken: %02d' % m, 'min %02d' % s, 'sec')

Expand Down
38 changes: 22 additions & 16 deletions dice_ml/explainer_interfaces/dice_genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@
Module to generate diverse counterfactual explanations based on genetic algorithm
This code is similar to 'GeCo: Quality Counterfactual Explanations in Real Time': https://arxiv.org/pdf/2101.01292.pdf
"""
import copy
import random
import timeit

from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import numpy as np
import pandas as pd
import random
import timeit
import copy
from sklearn.preprocessing import LabelEncoder

from dice_ml import diverse_counterfactuals as exp
from dice_ml.constants import ModelTypes
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DiceGenetic(ExplainerBase):
Expand Down Expand Up @@ -116,9 +115,8 @@ def do_random_init(self, num_inits, features_to_vary, query_instance, desired_cl
def do_KD_init(self, features_to_vary, query_instance, cfs, desired_class, desired_range):
cfs = self.label_encode(cfs)
cfs = cfs.reset_index(drop=True)

self.cfs = np.zeros((self.population_size, self.data_interface.number_of_features))
for kx in range(self.population_size):
row = []
for kx in range(self.population_size*5):
if kx >= len(cfs):
break
one_init = np.zeros(self.data_interface.number_of_features)
Expand All @@ -143,16 +141,21 @@ def do_KD_init(self, features_to_vary, query_instance, cfs, desired_class, desir
one_init[jx] = query_instance[jx]
else:
one_init[jx] = np.random.choice(self.feature_range[feature])
self.cfs[kx] = one_init
t = tuple(one_init)
if t not in row:
row.append(t)
if len(row) == self.population_size:
break
kx += 1
self.cfs = np.array(row)

new_array = [tuple(row) for row in self.cfs]
uniques = np.unique(new_array, axis=0)

if len(uniques) != self.population_size:
#if len(self.cfs) > self.population_size:
# pass
if len(self.cfs) != self.population_size:
print("Pericolo Loop infinito....!!!!")
remaining_cfs = self.do_random_init(
self.population_size - len(uniques), features_to_vary, query_instance, desired_class, desired_range)
self.cfs = np.concatenate([uniques, remaining_cfs])
self.population_size - len(self.cfs), features_to_vary, query_instance, desired_class, desired_range)
self.cfs = np.concatenate([self.cfs, remaining_cfs])

def do_cf_initializations(self, total_CFs, initialization, algorithm, features_to_vary, desired_range,
desired_class,
Expand Down Expand Up @@ -260,7 +263,7 @@ def _generate_counterfactuals(self, query_instance, total_CFs, initialization="k
(see diverse_counterfactuals.py).
"""

self.population_size = 10 * total_CFs
self.population_size = 3 * total_CFs

self.start_time = timeit.default_timer()

Expand Down Expand Up @@ -514,6 +517,9 @@ def find_counterfactuals(self, query_instance, desired_range, desired_class,
if len(self.final_cfs) == self.total_CFs:
print('Diverse Counterfactuals found! total time taken: %02d' %
m, 'min %02d' % s, 'sec')
elif len(self.final_cfs) == 0:
print('No Counterfactuals found for the given configuration, perhaps try with different parameters...',
'; total time taken: %02d' % m, 'min %02d' % s, 'sec')
else:
print('Only %d (required %d) ' % (len(self.final_cfs), self.total_CFs),
'Diverse Counterfactuals found for the given configuation, perhaps ',
Expand Down
10 changes: 5 additions & 5 deletions dice_ml/explainer_interfaces/dice_pytorch.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""
Module to generate diverse counterfactual explanations based on PyTorch framework
"""
import copy
import random
import timeit
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import torch

import numpy as np
import torch
import random
import timeit
import copy

from dice_ml import diverse_counterfactuals as exp
from dice_ml.counterfactual_explanations import CounterfactualExplanations
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DicePyTorch(ExplainerBase):
Expand Down
17 changes: 11 additions & 6 deletions dice_ml/explainer_interfaces/dice_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
Module to generate diverse counterfactual explanations based on random sampling.
A simple implementation.
"""
import random
import timeit

from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import numpy as np
import pandas as pd
import random
import timeit

from dice_ml import diverse_counterfactuals as exp
from dice_ml.constants import ModelTypes
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DiceRandom(ExplainerBase):
Expand Down Expand Up @@ -109,11 +108,17 @@ class of query_instance for binary classification.
cfs_df = None
candidate_cfs = pd.DataFrame(
np.repeat(query_instance.values, sample_size, axis=0), columns=query_instance.columns)
# Loop to change one feature at a time, then two features, and so on.
# Loop to change one feature at a time ##->(NOT TRUE), then two features, and so on.
for num_features_to_vary in range(1, len(self.features_to_vary)+1):
# commented lines allow more values to change as num_features_to_vary increases, instead of .at you should use .loc
# is deliberately left commented out to let you choose.
# is slower, but more complete and still faster than genetic/KDtree
# selected_features = np.random.choice(self.features_to_vary, (sample_size, num_features_to_vary), replace=True)
selected_features = np.random.choice(self.features_to_vary, (sample_size, 1), replace=True)
for k in range(sample_size):
candidate_cfs.at[k, selected_features[k][0]] = random_instances.at[k, selected_features[k][0]]
candidate_cfs.at[k, selected_features[k][0]] = random_instances._get_value(k, selected_features[k][0])
# If you only want to change one feature, you should use _get_value
# candidate_cfs.iloc[k][selected_features[k]]=random_instances.iloc[k][selected_features[k]]
scores = self.predict_fn(candidate_cfs)
validity = self.decide_cf_validity(scores)
if sum(validity) > 0:
Expand Down
12 changes: 6 additions & 6 deletions dice_ml/explainer_interfaces/dice_tensorflow1.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""
Module to generate diverse counterfactual explanations based on tensorflow 1.x
"""
import collections
import copy
import random
import timeit
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import tensorflow as tf

import numpy as np
import tensorflow as tf
import random
import collections
import timeit
import copy

from dice_ml import diverse_counterfactuals as exp
from dice_ml.counterfactual_explanations import CounterfactualExplanations
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DiceTensorFlow1(ExplainerBase):
Expand Down
12 changes: 6 additions & 6 deletions dice_ml/explainer_interfaces/dice_tensorflow2.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""
Module to generate diverse counterfactual explanations based on tensorflow 2.x
"""
import copy
import random
import timeit
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase
import tensorflow as tf

import numpy as np
import tensorflow as tf
import random
import timeit
import copy

from dice_ml import diverse_counterfactuals as exp
from dice_ml.counterfactual_explanations import CounterfactualExplanations
from dice_ml.explainer_interfaces.explainer_base import ExplainerBase


class DiceTensorFlow2(ExplainerBase):
Expand Down Expand Up @@ -177,7 +177,7 @@ def do_cf_initializations(self, total_CFs, algorithm, features_to_vary):
# CF initialization
if len(self.cfs) != self.total_CFs:
self.cfs = []
for _ in range(self.total_CFs):
for ix in range(self.total_CFs):
one_init = [[]]
for jx in range(self.minx.shape[1]):
one_init[0].append(np.random.uniform(self.minx[0][jx], self.maxx[0][jx]))
Expand Down

0 comments on commit 86dc77f

Please sign in to comment.