Skip to content

Commit

Permalink
Add documentation for log classification/regression/ranking metrics (#…
Browse files Browse the repository at this point in the history
…1501)

## Description

Adds and improves documentation in docstring format for
log_regression/log_classification/log_ranking metrics.

- [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).

---------

Co-authored-by: felipe207 <felipe@whylabs.ai>
  • Loading branch information
FelipeAdachi and felipe207 committed Apr 17, 2024
1 parent 8b2809e commit 747b0c1
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 18 deletions.
95 changes: 77 additions & 18 deletions python/whylogs/api/logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,49 @@ def log_classification_metrics(
Function to track metrics based on validation data.
user may also pass the associated attribute names associated with
target, prediction, and/or score.
Parameters
----------
targets : List[Union[str, bool, float, int]]
actual validated values
predictions : List[Union[str, bool, float, int]]
inferred/predicted values
scores : List[float], optional
assocaited scores for each inferred, all values set to 1 if not
passed
data : pd.DataFrame
Dataframe with the data to log.
target_column : str
Column name for the actual validated values.
prediction_column : str
Column name for the predicted values.
score_column : Optional[str], optional
Associated scores for each inferred, all values set to 1 if None, by default None
schema : Optional[DatasetSchema], optional
Defines the schema for tracking metrics in whylogs, by default None
log_full_data : bool, optional
Whether to log the complete dataframe or not.
If True, the complete DF will be logged in addition to the regression metrics.
If False, only the calculated regression metrics will be logged.
In a typical production use case, the ground truth might not be available
at the time the remaining data is generated. In order to prevent double profiling the
input features, consider leaving this as False. by default False.
dataset_timestamp : Optional[datetime], optional
dataset's timestamp, by default None
Examples
--------
::
data = {
"product": ["milk", "carrot", "cheese", "broccoli"],
"category": ["dairies", "vegetables", "dairies", "vegetables"],
"output_discount": [0, 0, 1, 1],
"output_prediction": [0, 0, 0, 1],
}
df = pd.DataFrame(data)
results = why.log_classification_metrics(
df,
target_column="output_discount",
prediction_column="output_prediction",
log_full_data=True,
)
"""

perf_column_mapping = {"predictions": prediction_column, "targets": target_column, "scores": score_column}
Expand Down Expand Up @@ -214,19 +248,44 @@ def log_regression_metrics(
log_full_data: bool = False,
dataset_timestamp: Optional[datetime] = None,
) -> ResultSet:
"""
Function to track regression metrics based on validation data.
user may also pass the associated attribute names associated with
target, prediction, and/or score.
"""Function to track regression metrics based on validation data.
User may also pass the associated attribute names associated with target, prediction, and/or score.
Parameters
----------
targets : List[Union[str, bool, float, int]]
actual validated values
predictions : List[Union[str, bool, float, int]]
inferred/predicted values
scores : List[float], optional
assocaited scores for each inferred, all values set to 1 if not
passed
data : pd.DataFrame
Dataframe with the data to log.
target_column : str
Column name for the target values.
prediction_column : str
Column name for the predicted values.
schema : Optional[DatasetSchema], optional
Defines the schema for tracking metrics in whylogs, by default None
log_full_data : bool, optional
Whether to log the complete dataframe or not.
If True, the complete DF will be logged in addition to the regression metrics.
If False, only the calculated regression metrics will be logged.
In a typical production use case, the ground truth might not be available
at the time the remaining data is generated. In order to prevent double profiling the
input features, consider leaving this as False. by default False.
dataset_timestamp : Optional[datetime], optional
dataset's timestamp, by default None
Returns
-------
ResultSet
Examples
--------
::
import pandas as pd
import whylogs as why
df = pd.DataFrame({"target_temperature": [[10.5, 24.3, 15.6]], "predicted_temperature": [[9.12,26.42,13.12]]})
results = why.log_regression_metrics(df, target_column = "temperature", prediction_column = "prediction_temperature")
"""
perf_column_mapping: Dict[str, Optional[str]] = {"predictions": prediction_column, "targets": target_column}

Expand Down
Empty file.
96 changes: 96 additions & 0 deletions python/whylogs/experimental/api/logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,102 @@ def log_batch_ranking_metrics(
schema: Union[DatasetSchema, None] = None,
log_full_data: bool = False,
) -> ViewResultSet:
"""Log ranking metrics for a batch of data.
Parameters
----------
data : pd.core.frame.DataFrame
Dataframe with the data to log.
prediction_column : Optional[str], optional
Column name for the predicted values. If not provided, the score_column and target_column must be provided, by default None
target_column : Optional[str], optional
Column name for the relevance scores. If not provided, relevance must be encoded within prediction column, by default None
score_column : Optional[str], optional
Column name for the scores. Can either be probabilities, confidence values, or other continuous measures.
If not passed, prediction_column must be passed,by default None
k : Optional[int], optional
Consider the top k ranks for metrics calculation.
If `None`, use all outputs, by default None
convert_non_numeric : bool, optional
Indicates whether prediction/target columns are non-numeric.
If True, prediction/target should be strings, by default False
schema : Union[DatasetSchema, None], optional
Defines the schema for tracking metrics in whylogs, by default None
log_full_data : bool, optional
Whether to log the complete dataframe or not.
If True, the complete DF will be logged in addition to the ranking metrics.
If False, only the calculated ranking metrics will be logged.
In a typical production use case, the ground truth might not be available
at the time the remaining data is generated. In order to prevent double profiling the
input features, consider leaving this as False. by default False
Returns
-------
ViewResultSet
Examples
--------
::
import pandas as pd
from whylogs.experimental.api.logger import log_batch_ranking_metrics
# 1st and 2nd recommended items are relevant - 3rd is not
df = pd.DataFrame({"targets": [[1, 0, 1]], "predictions": [[2,3,1]]})
results = log_batch_ranking_metrics(
data=df,
prediction_column="predictions",
target_column="targets",
k=3,
)
::
non_numerical_df = pd.DataFrame(
{
"raw_predictions": [
["cat", "pig", "elephant"],
["horse", "donkey", "robin"],
],
"raw_targets": [
["cat", "elephant"],
["dog"],
],
}
)
# 1st query:
# Recommended items: [cat, pig, elephant]
# Relevant items: [cat, elephant]
# 2nd query:
# Recommended items: [horse, donkey, robin]
# Relevant items: [dog]
results = log_batch_ranking_metrics(
k=2,
data=non_numerical_df,
prediction_column="raw_predictions",
target_column="raw_targets",
convert_non_numeric=True
)
::
binary_single_df = pd.DataFrame(
{
"raw_predictions": [
[True, False, True], # First recommended item: Relevant, Second: Not relevant, Third: Relevant
[False, False, False], # None of the recommended items are relevant
[True, True, False], # First and second recommended items are relevant
]
}
)
result = log_batch_ranking_metrics(data=binary_single_df, prediction_column="raw_predictions", k=3)
"""
formatted_data = data.copy(deep=True) # TODO: does this have to be deep?

if prediction_column is None:
Expand Down

0 comments on commit 747b0c1

Please sign in to comment.