Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add confusion_matrix_statistics function in contingency_tables #9118

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
129 changes: 129 additions & 0 deletions statsmodels/stats/contingency_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"""

import warnings
from typing import Tuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1429,3 +1430,131 @@
return b

return q_stat, pvalue, df


def confusion_matrix_statistics(
table, actual_in_column=True, positive=None
) -> Tuple[pd.Series, pd.DataFrame]:
"""
Calculate various performance metrics based on a confusion matrix.

Parameters
----------
table : ndarray, DataFrame, or square table
Confusion matrix representing the classification results.
positive : bool, default=None
This parameter is applicable only for binary classification.
If positive is not provided (None) or 0, the positive class is assumed
to be the first row/column.
If positive is 1, the second row/column are considered positive.
actual_in_column : bool, default=True
If True, it is assumed that the actual values are in the columns of
the confusion matrix. i.e., each column corresponds to an actual class.
If False, it is assumed that the actual values are in the rows.

Returns
-------
overall_stats : Series
A Series containing overall performance metrics.
class_stats : DataFrame
A DataFrame containing class-wise performance metrics.

Notes
-----
- This function calculates various performance metrics, including accuracy,
no information rate, kappa, sensitivity, specificity, positive predictive
value, negative predictive value, balanced accuracy, and F1 score.
- The function supports binary and multiclass classification.
"""
if isinstance(table, Table):
table = table.table_orig

Check warning on line 1470 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1470

Added line #L1470 was not covered by tests

num_levels = table.shape[0]

if isinstance(table, pd.DataFrame) and not np.array_equal(
table.index, table.columns
):
raise ValueError(

Check warning on line 1477 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1477

Added line #L1477 was not covered by tests
"The table must have the same classes in the same order"
)

if isinstance(table, pd.DataFrame):
class_levels = table.index

Check warning on line 1482 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1482

Added line #L1482 was not covered by tests
else:
class_levels = [f"class {i + 1}" for i in range(num_levels)]
table = pd.DataFrame(table, index=class_levels, columns=class_levels)

if positive is not None and positive not in (0, 1):
raise ValueError("Positive argument must be 0 or 1")

Check warning on line 1488 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1488

Added line #L1488 was not covered by tests

if num_levels < 2:
raise ValueError("There must be at least 2 factors levels in the data")

Check warning on line 1491 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1491

Added line #L1491 was not covered by tests

if not actual_in_column:
table = table.transpose()

Check warning on line 1494 in statsmodels/stats/contingency_tables.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/stats/contingency_tables.py#L1494

Added line #L1494 was not covered by tests

# Table looks like
# 'TP FP
# FN TN'

correct = np.trace(table)
row_sum = table.sum(axis=1)
col_sum = table.sum(axis=0)
total = np.sum(row_sum)
expected = row_sum.dot(col_sum) / total

overall_stats = pd.Series(
{
"Accuracy": correct / total,
"No Information Rate": max(col_sum) / total,
"Kappa": (correct - expected) / (total - expected),
}
)

metric_names = [
"Sensitivity",
"Specificity",
"Pos Pred Value",
"Neg Pred Value",
"Balanced Accuracy",
"F1",
]

class_stats = pd.DataFrame(
index=(class_levels if num_levels > 2 else ["prediction"]),
columns=metric_names,
dtype=np.float64,
)

# For binary classification, loop will end after the first iteration
for i in range(num_levels if num_levels > 2 else 1):
# Handle binary classification scenario based on the positive argument
if num_levels == 2 and positive == 1:
TP = table.iloc[1, 1]
FN = table.iloc[0, 1]
FP = table.iloc[1, 0]
TN = table.iloc[0, 0]
else:
TP = table.iloc[i, i]
FN = col_sum.iloc[i] - TP
FP = row_sum.iloc[i] - TP
TN = total - (TP + FN + FP)

# Sensitivity
class_stats.iloc[i, 0] = TP / (TP + FN)
# Specificity
class_stats.iloc[i, 1] = TN / (TN + FP)
# Positive predictive value
class_stats.iloc[i, 2] = TP / (TP + FP)
# Negative predictive value
class_stats.iloc[i, 3] = TN / (TN + FN)
# Balanced Accuracy
class_stats.iloc[i, 4] = (
class_stats.iloc[i, 0] + class_stats.iloc[i, 1]
) / 2
# F1
class_stats.iloc[i, 5] = 2 / (
1 / class_stats.iloc[i, 0] + 1 / class_stats.iloc[i, 2]
)

return overall_stats, class_stats
43 changes: 43 additions & 0 deletions statsmodels/stats/tests/test_contingency_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,49 @@ def test_mcnemar():
b4 = ctab.mcnemar(tables[0], exact=True)
assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])

def test_confusion_matrix_statistics_2_categories():
overall_stats, class_stats = ctab.confusion_matrix_statistics(tables[0], positive=1)

expected_overall_stats = pd.Series({
'Accuracy': 0.6136,
'No Information Rate': 0.5227,
'Kappa': 0.2225
})
assert_allclose(overall_stats, expected_overall_stats, atol=1e-4)

expected_class_stats = pd.DataFrame(
{'Sensitivity': {'prediction': 0.6739},
'Specificity': {'prediction': 0.5476},
'Pos Pred Value': {'prediction': 0.6200},
'Neg Pred Value': {'prediction': 0.6053},
'Balanced Accuracy': {'prediction': 0.6108},
'F1': {'prediction': 0.6458}
})
assert_allclose(class_stats, expected_class_stats, atol=1e-4)


def test_confusion_matrix_statistics_4_categories():
overall_stats, class_stats = ctab.confusion_matrix_statistics(tables[1])

expected_overall_stats = pd.Series({
'Accuracy': 0.3347,
'No Information Rate': 0.3895,
'Kappa': 0.0771
})
assert_allclose(overall_stats, expected_overall_stats, atol=1e-4)

expected_class_stats = pd.DataFrame(
{'Sensitivity': [0.9863, 0.102564, 0.05714, 0.02703],
'Specificity': [0.2614, 0.896789, 0.92703, 0.99655],
'Pos Pred Value': [0.3721, 0.081633, 0.18182, 0.83333],
'Neg Pred Value': [0.9773, 0.917840, 0.77602, 0.61620],
'Balanced Accuracy': [0.6238, 0.499677, 0.49208, 0.51179],
'F1': [0.5403, 0.09091, 0.0870, 0.0524]}
)
expected_class_stats.index = [f'class {i + 1}' for i in range(4)]
assert_allclose(class_stats, expected_class_stats, atol=1e-4)


def test_from_data_stratified():

df = pd.DataFrame([[1, 1, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 0],
Expand Down