Skip to content

Commit

Permalink
segmented ranking metrics (#1514)
Browse files Browse the repository at this point in the history
## Description

Support segmented ranking metrics.

## Changes

- Segmented schemas are respected by the ranking metrics
- Added `sum_gain_k` to unit tests

Closes #1506

- [ ] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).

---------

Co-authored-by: Jamie Broomall <88007022+jamie256@users.noreply.github.com>
  • Loading branch information
richard-rogers and jamie256 committed May 7, 2024
1 parent 0a2019f commit 6b42a36
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 22 deletions.
67 changes: 56 additions & 11 deletions python/tests/experimental/api/test_logger.py
@@ -1,9 +1,61 @@
from math import isclose

import pytest

from whylogs.api.logger import SegmentedResultSet
from whylogs.core.schema import DatasetSchema
from whylogs.core.segmentation_partition import segment_on_column
from whylogs.core.stubs import pd
from whylogs.experimental.api.logger import log_batch_ranking_metrics


@pytest.mark.parametrize("log_full", [(True), (False)])
def test_log_batch_ranking_metrics_single_simple_segmented(log_full: bool):
single_df = pd.DataFrame(
{
"foo": [1, 2, 3, 4],
"raw_predictions": [
["cat", "pig", "elephant"],
["horse", "donkey", "robin"],
["cow", "pig", "giraffe"],
["pig", "dolphin", "elephant"],
],
"raw_targets": ["cat", "dog", "pig", "elephant"],
}
)
schema = DatasetSchema(segments=segment_on_column("foo"))
result = log_batch_ranking_metrics(
data=single_df,
prediction_column="raw_predictions",
target_column="raw_targets",
schema=schema,
log_full_data=log_full,
)
assert isinstance(result, SegmentedResultSet)

column_names = [
"reciprocal_rank",
"precision_k_3",
"recall_k_3",
"top_rank",
"average_precision_k_3",
"norm_dis_cumul_gain_k_3",
"sum_gain_k_3",
]
for profile in result.get_writables():
pandas_summary = profile.profile_view.to_pandas()
for col in column_names:
assert col in pandas_summary.index
if log_full:
assert {"foo", "raw_predictions", "raw_targets"}.issubset(set(profile.profile_view.get_columns().keys()))
assert len(profile.profile_view.get_columns().keys()) == len(column_names) + 3
else:
assert not {"foo", "raw_predictions", "raw_targets"}.issubset(
set(profile.profile_view.get_columns().keys())
)
assert len(profile.profile_view.get_columns().keys()) == len(column_names)


def test_log_batch_ranking_metrics_single_simple():
single_df = pd.DataFrame(
{
Expand All @@ -24,17 +76,16 @@ def test_log_batch_ranking_metrics_single_simple():
pandas_summary = result.view().to_pandas()

column_names = [
"accuracy_k_3",
"reciprocal_rank",
"precision_k_3",
"recall_k_3",
"top_rank",
"average_precision_k_3",
"norm_dis_cumul_gain_k_3",
"sum_gain_k_3",
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["accuracy_k_3", "counts/n"] == 1
assert pandas_summary.loc["reciprocal_rank", "counts/n"] == 4
assert pandas_summary.loc["precision_k_3", "counts/n"] == 4
assert pandas_summary.loc["recall_k_3", "counts/n"] == 4
Expand All @@ -49,7 +100,6 @@ def test_log_batch_ranking_metrics_single_simple():
assert isclose(pandas_summary.loc["recall_k_3", "distribution/mean"], 1.0, abs_tol=0.00001)
# rr = [1, 0, 0.5, 0.33333]
assert isclose(pandas_summary.loc["reciprocal_rank", "distribution/mean"], 0.45833, abs_tol=0.00001)
assert isclose(pandas_summary.loc["accuracy_k_3", "distribution/mean"], 0.75, abs_tol=0.00001)
assert isclose(pandas_summary.loc["sum_gain_k_3", "distribution/mean"], 0.75, abs_tol=0.00001)


Expand All @@ -63,17 +113,16 @@ def test_log_batch_ranking_metrics_binary_simple():

k = 2
column_names = [
"accuracy_k_" + str(k),
"reciprocal_rank",
"precision_k_" + str(k),
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
"sum_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["reciprocal_rank", "counts/n"] == 4
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
Expand All @@ -88,7 +137,6 @@ def test_log_batch_ranking_metrics_binary_simple():
assert isclose(pandas_summary.loc["recall_k_" + str(k), "distribution/mean"], 0.83333, abs_tol=0.00001)
# rr = [1, 0, 1, 0.5]
assert isclose(pandas_summary.loc["reciprocal_rank", "distribution/mean"], 0.625, abs_tol=0.00001)
assert isclose(pandas_summary.loc["accuracy_k_2", "distribution/mean"], 0.75, abs_tol=0.00001)
assert isclose(pandas_summary.loc["sum_gain_k_2", "distribution/mean"], 1.0, abs_tol=0.00001)


Expand All @@ -115,17 +163,16 @@ def test_log_batch_ranking_metrics_multiple_simple():
pandas_summary = result.view().to_pandas()

column_names = [
"accuracy_k_" + str(k),
"reciprocal_rank",
"precision_k_" + str(k),
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
"sum_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["reciprocal_rank", "counts/n"] == 4
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
Expand All @@ -147,17 +194,16 @@ def test_log_batch_ranking_metrics_default_target():

k = 3
column_names = [
"accuracy_k_" + str(k),
"reciprocal_rank",
"precision_k_" + str(k),
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
"sum_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["reciprocal_rank", "counts/n"] == 1
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 1
Expand All @@ -168,7 +214,6 @@ def test_log_batch_ranking_metrics_default_target():
assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.90130, abs_tol=0.00001)
# AP assumes binary relevance - this case doesn't raise an error, just a warning, but the result is not meaningful
assert isclose(pandas_summary.loc["average_precision_k_" + str(k), "distribution/mean"], 1.00000, abs_tol=0.00001)
assert isclose(pandas_summary.loc["accuracy_k_3", "distribution/mean"], 1.0, abs_tol=0.00001)
assert isclose(pandas_summary.loc["sum_gain_k_3", "distribution/mean"], 8.0, abs_tol=0.00001)


Expand Down
49 changes: 38 additions & 11 deletions python/whylogs/experimental/api/logger/__init__.py
@@ -1,9 +1,9 @@
import logging
import math
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Set, Tuple, Union

from whylogs.api.logger import log
from whylogs.api.logger.result_set import ViewResultSet
from whylogs.api.logger.result_set import SegmentedResultSet, ViewResultSet
from whylogs.core import DatasetSchema
from whylogs.core.stubs import np, pd

Expand Down Expand Up @@ -135,6 +135,26 @@ def _all_strings(data: pd.Series) -> bool:
return all([all([isinstance(y, str) for y in x]) for x in data])


def _get_segment_columns(schema: DatasetSchema, data: pd.DataFrame) -> List[str]:
columns: Set[str] = set()
for partition_name, partition in schema.segments.items():
if partition.filter:
raise ValueError("Filters are not supported for segmented ranking metrics") # Filters are deprecated
if partition.mapper:
columns = columns.union(set(partition.mapper.col_names))

return list(columns)


def _drop_non_output_columns(result: SegmentedResultSet, keep_columns: Set[str]) -> SegmentedResultSet:
for partition in result._segments.values():
for segment in partition.values():
for column in {column for column in segment._columns.keys() if column not in keep_columns}:
segment._columns.pop(column)

return result


def log_batch_ranking_metrics(
data: pd.core.frame.DataFrame,
prediction_column: Optional[str] = None,
Expand Down Expand Up @@ -330,18 +350,25 @@ def log_batch_ranking_metrics(
row_wise_functions.calculate_row_ndcg, args=(k,), axis=1
)
output_data[f"sum_gain_k_{k}"] = formatted_data.apply(row_wise_functions.sum_gains, args=(k,), axis=1)
hit_ratio = formatted_data["count_at_k"].apply(lambda x: bool(x)).sum() / len(formatted_data)
mrr = (1 / formatted_data["top_rank"]).replace([np.inf, np.nan], 0)
output_data["reciprocal_rank"] = mrr

if schema and schema.segments:
original_columns = set(data.columns)
for column in set(formatted_data.columns):
if column not in original_columns:
formatted_data = formatted_data.drop(column, axis=1)

if log_full_data:
return log(pandas=pd.concat([formatted_data, output_data], axis=1), schema=schema)
else:
segment_columns = _get_segment_columns(schema, formatted_data)
segmentable_data = formatted_data[segment_columns]
result = log(pandas=pd.concat([segmentable_data, output_data], axis=1), schema=schema)
result = _drop_non_output_columns(result, set(output_data.columns))
return result

result = log(pandas=output_data, schema=schema)
result = result.merge(
log(
row={
"accuracy" + ("_k_" + str(k) if k else ""): hit_ratio,
},
schema=schema,
)
)
if log_full_data:
result = result.merge(log(pandas=data, schema=schema))
return result

0 comments on commit 6b42a36

Please sign in to comment.