Skip to content

Commit

Permalink
Fixing metric names in plots (#163)
Browse files Browse the repository at this point in the history
* Changed metric names from acronym to actual name

* Metric name cleanup in flow

* Reverted changes in aequitas plots

* Update previous plots to support new naming scheme

* Remove dictionary from bubble concatenation chart

* Change reference to aequitas flow

* Remove duplicated argument

---------

Co-authored-by: Sérgio Jesus <sergio.jesus@feedzai.com>
  • Loading branch information
reluzita and sgpjesus committed Feb 12, 2024
1 parent 59c1a02 commit 8da3f6b
Show file tree
Hide file tree
Showing 11 changed files with 239 additions and 161 deletions.
25 changes: 6 additions & 19 deletions src/aequitas/flow/plots/bootstrap/plot.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Literal, Optional
from typing import Optional

import numpy as np

from ...utils.evaluation import bootstrap_hyperparameters
from ...utils.metrics import METRIC_NAMES, FAIRNESS_METRIC, PERFORMANCE_METRIC
from ...evaluation import Result


Expand All @@ -14,27 +15,13 @@
}


metrics = {
"Predictive Equality": "fpr_ratio",
"Equal Opportunity": "tpr_ratio",
"Demographic Parity": "pprev_ratio",
"TPR": "tpr",
"FPR": "fpr",
"FNR": "fnr",
"Accuracy": "accuracy",
"Precision": "precision",
}


class Plot:
def __init__(
self,
results: dict[str, dict[str, Result]],
dataset: str,
fairness_metric: Literal[
"Predictive Equality", "Equal Opportunity", "Demographic Parity"
],
performance_metric: Literal["TPR", "FPR", "FNR", "Accuracy", "Precision"],
fairness_metric: FAIRNESS_METRIC,
performance_metric: PERFORMANCE_METRIC,
method: Optional[str] = None,
confidence_intervals: float = 0.95,
**kwargs,
Expand Down Expand Up @@ -64,8 +51,8 @@ def __init__(
for key, value in DEFAULT_KWARGS.items():
if key not in self.kwargs:
self.kwargs[key] = value
self.kwargs["fairness_metric"] = metrics[fairness_metric]
self.kwargs["performance_metric"] = metrics[performance_metric]
self.kwargs["fairness_metric"] = METRIC_NAMES[fairness_metric]
self.kwargs["performance_metric"] = METRIC_NAMES[performance_metric]
self.bootstrap_results = {}
if isinstance(self.kwargs["alpha_points"], np.ndarray):
self.x = self.kwargs["alpha_points"]
Expand Down
16 changes: 3 additions & 13 deletions src/aequitas/flow/plots/bootstrap/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


from .plot import Plot
from ...utils.metrics import METRIC_NAMES

sns.set()
sns.set_style("whitegrid", {"grid.linestyle": "--"})
Expand Down Expand Up @@ -53,22 +54,11 @@
"grid_search_folktables",
]

metrics_names = {
"Predictive Equality": "Pred. Eq.",
"Equal Opportunity": "Eq. Opp.",
"Demographic Parity": "Dem. Par.",
"TPR": "TPR",
"FPR": "FPR",
"FNR": "FNR",
"Accuracy": "Acc.",
"Precision": "Prec.",
}


def visualize(plot: Plot):
# define the name of the metrics for plot
perf_metric_plot = metrics_names[plot.performance_metric]
fair_metric_plot = metrics_names[plot.fairness_metric]
perf_metric_plot = METRIC_NAMES[plot.performance_metric]
fair_metric_plot = METRIC_NAMES[plot.fairness_metric]

x = plot.x

Expand Down
31 changes: 10 additions & 21 deletions src/aequitas/flow/plots/pareto/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ....bias import Bias
from ....group import Group
from ....plot import summary, disparity
from ...utils.metrics import FAIRNESS_METRIC, PERFORMANCE_METRIC


_names = {
Expand Down Expand Up @@ -54,10 +55,12 @@ class Plot:
Name of the dataset to be used in the Pareto plot.
method : union[str, list], optional
Name of the method to plot. If none, all methods will be plotted.
fairness_metric : {"Predictive Equality", "Equal Opportunity", "Demographic Parity"}
The default fairness metric to use in the Pareto plot.
performance_metric : {"TPR", "FPR", "FNR", "Accuracy", "Precision"}
The default performance metric to use in the Pareto plot.
fairness_metric : str
The default fairness metric to use in the Pareto plot. Possible values
are defined in aequitas.flow.utils.metrics
performance_metric : str
The default performance metric to use in the Pareto plot. Possible values
are defined in aequitas.flow.utils.metrics
alpha : float, optional
The alpha value to use in the Pareto plot.
direction : {"minimize", "maximize"}, optional
Expand All @@ -68,10 +71,8 @@ def __init__(
self,
results: dict[str, dict[str, Result]],
dataset: str,
fairness_metric: Literal[
"Predictive Equality", "Equal Opportunity", "Demographic Parity"
],
performance_metric: Literal["TPR", "FPR", "FNR", "Accuracy", "Precision"],
fairness_metric: FAIRNESS_METRIC,
performance_metric: PERFORMANCE_METRIC,
method: Optional[Union[str, list]] = None,
alpha: float = 0.5,
direction: Literal["minimize", "maximize"] = "maximize",
Expand Down Expand Up @@ -109,18 +110,6 @@ def __init__(
self.performance_metric = performance_metric
self.alpha = alpha
self.direction = direction
self.available_fairness_metrics = {
"Predictive Equality",
"Equal Opportunity",
"Demographic Parity",
} # Hardcoded for now
self.available_performance_metrics = [
"TPR",
"FPR",
"FNR",
"Accuracy",
"Precision",
]
self._best_model_idx: int = 0

@property
Expand Down Expand Up @@ -302,7 +291,7 @@ def disparities(
model_id: int,
dataset: Any,
sensitive_attribute: Union[str, list[str]],
metrics: list[str] = ["tpr", "fpr"],
metrics: list[str] = ["TPR", "FPR"],
fairness_threshold: float = 1.2,
results_path: Union[Path, str] = "examples/experiment_results",
reference_groups: Optional[dict[str, str]] = None,
Expand Down
12 changes: 5 additions & 7 deletions src/aequitas/flow/plots/pareto/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pkg_resources

from .plot import Plot
from ...utils.metrics import FAIRNESS_METRICS, PERFORMANCE_METRICS


# NumPy data types are not JSON serializable. This custom JSON encoder will
Expand Down Expand Up @@ -90,12 +91,9 @@ def visualize(wrapper: Plot, mode="display", save_path=None, pareto_only=False):
if pareto_only:
wrapper_results_flat = wrapper_results_flat[wrapper_results_flat["is_pareto"]]

fairness_metrics = list(wrapper.available_fairness_metrics)
performance_metrics = list(wrapper.available_performance_metrics)

filtered_results = wrapper_results_flat[
fairness_metrics
+ performance_metrics
FAIRNESS_METRICS
+ PERFORMANCE_METRICS
+ ["model_id", "hyperparams", "is_pareto"]
]

Expand All @@ -108,8 +106,8 @@ def visualize(wrapper: Plot, mode="display", save_path=None, pareto_only=False):
"recommended_model": wrapper.best_model_details,
"optimized_fairness_metric": wrapper.fairness_metric,
"optimized_performance_metric": wrapper.performance_metric,
"fairness_metrics": fairness_metrics,
"performance_metrics": performance_metrics,
"fairness_metrics": FAIRNESS_METRICS,
"performance_metrics": PERFORMANCE_METRICS,
"tuner_type": "Random Search", # Hardcoded for now
"alpha": wrapper.alpha,
}
Expand Down
6 changes: 3 additions & 3 deletions src/aequitas/flow/utils/colab.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ def get_examples(
"methods/data_repair",
]
) -> None:
"""Downloads the examples from the fairflow repository.
"""Downloads the examples from the aequitas flow repository.
Note that this should not be used outside Google Colab, as it clutters the directory
with with the git files from Aequitas repository.
Parameters
----------
directory : Literal["configs", "examples/data_repair", "experiment_results"]
The directory to download from the fairflow repository.
The directory to download from the aequitas flow repository.
"""
directory = "examples/" + directory
logger = create_logger("utils.colab")
logger.info("Downloading examples from fairflow repository.")
logger.info("Downloading examples from aequitas flow repository.")
# Create directory if it doesn't exist
Path(directory).mkdir(parents=True, exist_ok=True)
# Check if git repository already exists in folder
Expand Down
20 changes: 20 additions & 0 deletions src/aequitas/flow/utils/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Literal

FAIRNESS_METRICS = ["Predictive Equality", "Equal Opportunity", "Demographic Parity"]
PERFORMANCE_METRICS = ["TPR", "FPR", "FNR", "Accuracy", "Precision"]

METRIC_NAMES = {
"Predictive Equality": "fpr_ratio",
"Equal Opportunity": "tpr_ratio",
"Demographic Parity": "pprev_ratio",
"TPR": "tpr",
"FPR": "fpr",
"FNR": "fnr",
"Accuracy": "accuracy",
"Precision": "precision",
}

FAIRNESS_METRIC = Literal[
"Predictive Equality", "Equal Opportunity", "Demographic Parity"
]
PERFORMANCE_METRIC = Literal["TPR", "FPR", "FNR", "Accuracy", "Precision"]
26 changes: 16 additions & 10 deletions src/aequitas/plot/bubble_concatenation_chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
from aequitas.plot.commons.style.sizes import Concat_Chart
from aequitas.plot.commons import initializers as Initializer

# Altair 2.4.1 requires that all chart receive a dataframe, for charts that don't need it
# (like most annotations), we pass the following dummy dataframe to reduce the complexity of the resulting vega spec.
# Altair 2.4.1 requires that all chart receive a dataframe, for charts that don't need
# it (like most annotations), we pass the following dummy dataframe to reduce the
# complexity of the resulting vega spec.
DUMMY_DF = pd.DataFrame({"a": [1, 1], "b": [0, 0]})


def __get_chart_sizes(chart_width):
"""Calculates the widths of the disparity and metric charts that make-up the concatenated chart.
The individual widths are calculated based on the provided desired overall chart width."""
"""Calculates the widths of the disparity and metric charts that make-up the
concatenated chart. The individual widths are calculated based on the provided
desired overall chart width.
"""

chart_sizes = dict(
disparity_chart_width=0.5 * chart_width, metric_chart_width=0.5 * chart_width
Expand Down Expand Up @@ -59,7 +62,8 @@ def plot_concatenated_bubble_charts(
chart_width=Concat_Chart.full_width,
accessibility_mode=False,
):
"""Draws a concatenation of the disparity bubble chart and the metric values bubble chart,
"""Draws a concatenation of the disparity bubble chart and the metric values bubble
chart,
of the selected metrics for a given attribute.
:param disparity_df: a dataframe generated by the Aequitas Bias class
Expand All @@ -68,19 +72,20 @@ def plot_concatenated_bubble_charts(
:type metrics_list: list
:param attribute: an attribute to plot
:type attribute: str
:param fairness_threshold: a value for the maximum allowed disparity, defaults to 1.25
:param fairness_threshold: a value for the maximum allowed disparity, defaults to
1.25
:type fairness_threshold: float, optional
:param chart_height: a value (in pixels) for the height of the chart
:type chart_height: int, optional
:param chart_width: a value (in pixels) for the width of the chart
:type chart_width: int, optional
:param accessibility_mode: a switch for the display of more accessible visual elements, defaults to False
:param accessibility_mode: a switch for the display of more accessible visual
elements, defaults to False
:type accessibility_mode: bool, optional
:return: the full disparities chart
:rtype: Altair chart object
"""

(
plot_table,
metrics,
Expand Down Expand Up @@ -170,10 +175,11 @@ def plot_concatenated_bubble_charts(
offset=Chart_Title.offset,
)
.properties(
title=attribute.title(),
title=attribute.title(),
padding={
"top": Concat_Chart.full_chart_padding,
"bottom": -FONT_SIZE_SMALL * 0.75/3 * len(metrics_list) + Concat_Chart.full_chart_padding,
"bottom": -FONT_SIZE_SMALL * 0.75 / 3 * len(metrics_list)
+ Concat_Chart.full_chart_padding,
"left": Concat_Chart.full_chart_padding,
"right": Concat_Chart.full_chart_padding,
},
Expand Down

0 comments on commit 8da3f6b

Please sign in to comment.