Skip to content
This repository has been archived by the owner on May 31, 2023. It is now read-only.

Commit

Permalink
final draft of paper, fix normalized md
Browse files Browse the repository at this point in the history
normalized mean difference now ranges from
-1 to 1
  • Loading branch information
cosmicBboy committed Sep 22, 2017
1 parent 75482f1 commit 79cb954
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 221 deletions.
513 changes: 298 additions & 215 deletions paper/Evaluating Themis-ml.ipynb

Large diffs are not rendered by default.

Binary file modified paper/IMG/fairness_aware_comparison.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/IMG/fairness_utility_tradeoff.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/IMG/logistic_regression_fairness_utility_tradeoff.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified paper/main.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion paper/main.tex
Expand Up @@ -607,7 +607,7 @@ \section{Evaluating Themis-ml}
\textbf{nmd (\%)} & \textbf{nmd 95\% CI}\\
\hline
\textbf{female} & 7.48 & (1.35, 13.61) & 7.73 & (1.39, 14.06) \\
\textbf{foreign worker} & 19.93 & (4.91, 34.94) & 63.96 & (15.76, 112.17)\\
\textbf{foreign worker} & 19.93 & (4.91, 34.94) & 63.96 & (15.76, 100.00)\\
\textbf{age below 25} & 14.94 & (7.76, 22.13) & 17.29 & (8.97, 25.61)\\
\end{tabularx}
\end{table}
Expand Down
25 changes: 20 additions & 5 deletions themis_ml/metrics.py
@@ -1,13 +1,23 @@
"""Module for Fairness-aware scoring metrics."""

import numpy as np
import scipy

from .checks import check_binary
from math import sqrt
from scipy.stats import t

DEFAULT_CI = 0.975

def mean_differences_ci(y, s, ci=0.975):

def mean_confidence_interval(x, confidence=0.95):
a = np.array(x) * 1.0
mu, se = np.mean(a), scipy.stats.sem(a)
me = se * t._ppf((1 + confidence) / 2., len(a) - 1)
return mu, mu - me, mu + me


def mean_differences_ci(y, s, ci=DEFAULT_CI):
"""Calculate the mean difference and confidence interval.
:param array-like y: shape (n, ) containing binary target variable, where
Expand Down Expand Up @@ -70,7 +80,7 @@ def mean_difference(y, s):
return mean_differences_ci(y, s)


def normalized_mean_difference(y, s, norm_y=None):
def normalized_mean_difference(y, s, norm_y=None, ci=DEFAULT_CI):
"""Compute normalized mean difference in y with respect to s.
Same the mean difference score, except the score takes into account the
Expand Down Expand Up @@ -103,8 +113,9 @@ def normalized_mean_difference(y, s, norm_y=None):
group.
:param numpy.array|None norm_y: shape (n, ) or None. If provided, this
array is used to compute the normalization factor d_max.
:returns: mean difference between advantaged group and disadvantaged group.
:rtype: float
:returns: mean difference between advantaged group and disadvantaged group
with lower and upper confidence interval bounds
:rtype: tuple(float)
"""
y = check_binary(np.array(y).astype(int))
s = check_binary(np.array(s).astype(int))
Expand All @@ -116,7 +127,11 @@ def normalized_mean_difference(y, s, norm_y=None):
# TODO: Figure out if scaling the CI bounds by d_max makes sense here.
if d_max == 0:
return md
return (md[0] / d_max, md[1] / d_max, md[2] / d_max)
lower_ci = md[1] / d_max
lower_ci = lower_ci if lower_ci > -1 else -1
upper_ci = md[2] / d_max
upper_ci = upper_ci if upper_ci < 1 else 1
return (md[0] / d_max, lower_ci, upper_ci)


def abs_mean_difference_delta(y, pred, s):
Expand Down

0 comments on commit 79cb954

Please sign in to comment.