statsmodels · pasrakan · Jan 8, 2024 · Jan 8, 2024 · Jan 9, 2024 · Jan 10, 2024
diff --git a/statsmodels/stats/contingency_tables.py b/statsmodels/stats/contingency_tables.py
@@ -26,6 +26,7 @@
 """
 
 import warnings
+from typing import Tuple
 
 import numpy as np
 import pandas as pd
@@ -1429,3 +1430,131 @@
         return b
 
     return q_stat, pvalue, df
+
+
+def confusion_matrix_statistics(
+    table, actual_in_column=True, positive=None
+) -> Tuple[pd.Series, pd.DataFrame]:
+    """
+    Calculate various performance metrics based on a confusion matrix.
+
+    Parameters
+    ----------
+    table : ndarray, DataFrame, or square table
+        Confusion matrix representing the classification results.
+    positive : bool, default=None
+        This parameter is applicable only for binary classification.
+        If positive is not provided (None) or 0, the positive class is assumed
+        to be the first row/column.
+        If positive is 1, the second row/column are considered positive.
+    actual_in_column : bool, default=True
+        If True, it is assumed that the actual values are in the columns of
+        the confusion matrix. i.e., each column corresponds to an actual class.
+        If False, it is assumed that the actual values are in the rows.
+
+    Returns
+    -------
+    overall_stats : Series
+        A Series containing overall performance metrics.
+    class_stats : DataFrame
+        A DataFrame containing class-wise performance metrics.
+
+    Notes
+    -----
+    - This function calculates various performance metrics, including accuracy,
+      no information rate, kappa, sensitivity, specificity, positive predictive
+      value, negative predictive value, balanced accuracy, and F1 score.
+    - The function supports binary and multiclass classification.
+    """
+    if isinstance(table, Table):
+        table = table.table_orig
+
+    num_levels = table.shape[0]
+
+    if isinstance(table, pd.DataFrame) and not np.array_equal(
+        table.index, table.columns
+    ):
+        raise ValueError(
+            "The table must have the same classes in the same order"
+        )
+
+    if isinstance(table, pd.DataFrame):
+        class_levels = table.index
+    else:
+        class_levels = [f"class {i + 1}" for i in range(num_levels)]
+        table = pd.DataFrame(table, index=class_levels, columns=class_levels)
+
+    if positive is not None and positive not in (0, 1):
+        raise ValueError("Positive argument must be 0 or 1")
+
+    if num_levels < 2:
+        raise ValueError("There must be at least 2 factors levels in the data")
+
+    if not actual_in_column:
+        table = table.transpose()
+
+    # Table looks like
+    #    'TP FP
+    #     FN TN'
+
+    correct = np.trace(table)
+    row_sum = table.sum(axis=1)
+    col_sum = table.sum(axis=0)
+    total = np.sum(row_sum)
+    expected = row_sum.dot(col_sum) / total
+
+    overall_stats = pd.Series(
+        {
+            "Accuracy": correct / total,
+            "No Information Rate": max(col_sum) / total,
+            "Kappa": (correct - expected) / (total - expected),
+        }
+    )
+
+    metric_names = [
+        "Sensitivity",
+        "Specificity",
+        "Pos Pred Value",
+        "Neg Pred Value",
+        "Balanced Accuracy",
+        "F1",
+    ]
+
+    class_stats = pd.DataFrame(
+        index=(class_levels if num_levels > 2 else ["prediction"]),
+        columns=metric_names,
+        dtype=np.float64,
+    )
+
+    # For binary classification, loop will end after the first iteration
+    for i in range(num_levels if num_levels > 2 else 1):
+        # Handle binary classification scenario based on the positive argument
+        if num_levels == 2 and positive == 1:
+            TP = table.iloc[1, 1]
+            FN = table.iloc[0, 1]
+            FP = table.iloc[1, 0]
+            TN = table.iloc[0, 0]
+        else:
+            TP = table.iloc[i, i]
+            FN = col_sum.iloc[i] - TP
+            FP = row_sum.iloc[i] - TP
+            TN = total - (TP + FN + FP)
+
+        # Sensitivity
+        class_stats.iloc[i, 0] = TP / (TP + FN)
+        # Specificity
+        class_stats.iloc[i, 1] = TN / (TN + FP)
+        # Positive predictive value
+        class_stats.iloc[i, 2] = TP / (TP + FP)
+        # Negative predictive value
+        class_stats.iloc[i, 3] = TN / (TN + FN)
+        # Balanced Accuracy
+        class_stats.iloc[i, 4] = (
+            class_stats.iloc[i, 0] + class_stats.iloc[i, 1]
+        ) / 2
+        # F1
+        class_stats.iloc[i, 5] = 2 / (
+            1 / class_stats.iloc[i, 0] + 1 / class_stats.iloc[i, 2]
+        )
+
+    return overall_stats, class_stats
diff --git a/statsmodels/stats/tests/test_contingency_tables.py b/statsmodels/stats/tests/test_contingency_tables.py
@@ -230,6 +230,49 @@ def test_mcnemar():
     b4 = ctab.mcnemar(tables[0], exact=True)
     assert_allclose(b4.pvalue, r_results.loc[0, "homog_binom_p"])
 
+def test_confusion_matrix_statistics_2_categories():
+    overall_stats, class_stats = ctab.confusion_matrix_statistics(tables[0], positive=1)
+
+    expected_overall_stats = pd.Series({
+        'Accuracy': 0.6136,
+        'No Information Rate': 0.5227,
+        'Kappa': 0.2225
+    })
+    assert_allclose(overall_stats, expected_overall_stats, atol=1e-4)
+
+    expected_class_stats = pd.DataFrame(
+        {'Sensitivity': {'prediction': 0.6739},
+         'Specificity': {'prediction': 0.5476},
+         'Pos Pred Value': {'prediction': 0.6200},
+         'Neg Pred Value': {'prediction': 0.6053},
+         'Balanced Accuracy': {'prediction': 0.6108},
+         'F1': {'prediction': 0.6458}
+         })
+    assert_allclose(class_stats, expected_class_stats, atol=1e-4)
+
+
+def test_confusion_matrix_statistics_4_categories():
+    overall_stats, class_stats = ctab.confusion_matrix_statistics(tables[1])
+
+    expected_overall_stats = pd.Series({
+        'Accuracy': 0.3347,
+        'No Information Rate': 0.3895,
+        'Kappa': 0.0771
+    })
+    assert_allclose(overall_stats, expected_overall_stats, atol=1e-4)
+
+    expected_class_stats = pd.DataFrame(
+        {'Sensitivity': [0.9863, 0.102564, 0.05714, 0.02703],
+        'Specificity': [0.2614, 0.896789, 0.92703, 0.99655],
+        'Pos Pred Value': [0.3721, 0.081633, 0.18182, 0.83333],
+        'Neg Pred Value': [0.9773, 0.917840, 0.77602, 0.61620],
+        'Balanced Accuracy': [0.6238, 0.499677, 0.49208, 0.51179],
+        'F1': [0.5403, 0.09091, 0.0870, 0.0524]}
+    )
+    expected_class_stats.index = [f'class {i + 1}' for i in range(4)]
+    assert_allclose(class_stats, expected_class_stats, atol=1e-4)
+
+
 def test_from_data_stratified():
 
     df = pd.DataFrame([[1, 1, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 0],