Add documentation for log classification/regression/ranking metrics (#…

…1501) ## Description Adds and improves documentation in docstring format for log_regression/log_classification/log_ranking metrics. - [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md) and the [Code of Conduct](CODE_OF_CONDUCT.md). --------- Co-authored-by: felipe207 <felipe@whylabs.ai>
whylabs · Apr 17, 2024 · 747b0c1 · 747b0c1
1 parent 8b2809e
commit 747b0c1
Show file tree

Hide file tree

Showing 3 changed files with 173 additions and 18 deletions.
diff --git a/python/whylogs/api/logger/__init__.py b/python/whylogs/api/logger/__init__.py
@@ -170,15 +170,49 @@ def log_classification_metrics(
     Function to track metrics based on validation data.
     user may also pass the associated attribute names associated with
     target, prediction, and/or score.
+
     Parameters
     ----------
-    targets : List[Union[str, bool, float, int]]
-        actual validated values
-    predictions : List[Union[str, bool, float, int]]
-        inferred/predicted values
-    scores : List[float], optional
-        assocaited scores for each inferred, all values set to 1 if not
-        passed
+    data : pd.DataFrame
+        Dataframe with the data to log.
+    target_column : str
+        Column name for the actual validated values.
+    prediction_column : str
+        Column name for the predicted values.
+    score_column : Optional[str], optional
+        Associated scores for each inferred, all values set to 1 if None, by default None
+    schema : Optional[DatasetSchema], optional
+        Defines the schema for tracking metrics in whylogs, by default None
+    log_full_data : bool, optional
+        Whether to log the complete dataframe or not.
+        If True, the complete DF will be logged in addition to the regression metrics.
+        If False, only the calculated regression metrics will be logged.
+        In a typical production use case, the ground truth might not be available
+        at the time the remaining data is generated. In order to prevent double profiling the
+        input features, consider leaving this as False. by default False.
+    dataset_timestamp : Optional[datetime], optional
+        dataset's timestamp, by default None
+
+    Examples
+    --------
+    ::
+
+        data = {
+            "product": ["milk", "carrot", "cheese", "broccoli"],
+            "category": ["dairies", "vegetables", "dairies", "vegetables"],
+            "output_discount": [0, 0, 1, 1],
+            "output_prediction": [0, 0, 0, 1],
+        }
+        df = pd.DataFrame(data)
+
+        results = why.log_classification_metrics(
+                df,
+                target_column="output_discount",
+                prediction_column="output_prediction",
+                log_full_data=True,
+            )
+
+
     """
 
     perf_column_mapping = {"predictions": prediction_column, "targets": target_column, "scores": score_column}
@@ -214,19 +248,44 @@ def log_regression_metrics(
     log_full_data: bool = False,
     dataset_timestamp: Optional[datetime] = None,
 ) -> ResultSet:
-    """
-    Function to track regression metrics based on validation data.
-    user may also pass the associated attribute names associated with
-    target, prediction, and/or score.
+    """Function to track regression metrics based on validation data.
+    User may also pass the associated attribute names associated with target, prediction, and/or score.
+
     Parameters
     ----------
-    targets : List[Union[str, bool, float, int]]
-        actual validated values
-    predictions : List[Union[str, bool, float, int]]
-        inferred/predicted values
-    scores : List[float], optional
-        assocaited scores for each inferred, all values set to 1 if not
-        passed
+    data : pd.DataFrame
+        Dataframe with the data to log.
+    target_column : str
+        Column name for the target values.
+    prediction_column : str
+        Column name for the predicted values.
+    schema : Optional[DatasetSchema], optional
+        Defines the schema for tracking metrics in whylogs, by default None
+    log_full_data : bool, optional
+        Whether to log the complete dataframe or not.
+        If True, the complete DF will be logged in addition to the regression metrics.
+        If False, only the calculated regression metrics will be logged.
+        In a typical production use case, the ground truth might not be available
+        at the time the remaining data is generated. In order to prevent double profiling the
+        input features, consider leaving this as False. by default False.
+    dataset_timestamp : Optional[datetime], optional
+        dataset's timestamp, by default None
+
+    Returns
+    -------
+    ResultSet
+
+    Examples
+    --------
+    ::
+
+        import pandas as pd
+        import whylogs as why
+
+        df = pd.DataFrame({"target_temperature": [[10.5, 24.3, 15.6]], "predicted_temperature": [[9.12,26.42,13.12]]})
+        results = why.log_regression_metrics(df, target_column = "temperature", prediction_column = "prediction_temperature")
+
+
     """
     perf_column_mapping: Dict[str, Optional[str]] = {"predictions": prediction_column, "targets": target_column}
 

diff --git a/python/whylogs/experimental/api/__init__.py b/python/whylogs/experimental/api/__init__.py
diff --git a/python/whylogs/experimental/api/logger/__init__.py b/python/whylogs/experimental/api/logger/__init__.py
@@ -141,6 +141,102 @@ def log_batch_ranking_metrics(
     schema: Union[DatasetSchema, None] = None,
     log_full_data: bool = False,
 ) -> ViewResultSet:
+    """Log ranking metrics for a batch of data.
+
+    Parameters
+    ----------
+    data : pd.core.frame.DataFrame
+        Dataframe with the data to log.
+    prediction_column : Optional[str], optional
+        Column name for the predicted values. If not provided, the score_column and target_column must be provided, by default None
+    target_column : Optional[str], optional
+        Column name for the relevance scores. If not provided, relevance must be encoded within prediction column, by default None
+    score_column : Optional[str], optional
+        Column name for the scores. Can either be probabilities, confidence values, or other continuous measures.
+        If not passed, prediction_column must be passed,by default None
+    k : Optional[int], optional
+        Consider the top k ranks for metrics calculation.
+        If `None`, use all outputs, by default None
+    convert_non_numeric : bool, optional
+        Indicates whether prediction/target columns are non-numeric.
+        If True, prediction/target should be strings, by default False
+    schema : Union[DatasetSchema, None], optional
+        Defines the schema for tracking metrics in whylogs, by default None
+    log_full_data : bool, optional
+        Whether to log the complete dataframe or not.
+        If True, the complete DF will be logged in addition to the ranking metrics.
+        If False, only the calculated ranking metrics will be logged.
+        In a typical production use case, the ground truth might not be available
+        at the time the remaining data is generated. In order to prevent double profiling the
+        input features, consider leaving this as False. by default False
+
+    Returns
+    -------
+    ViewResultSet
+
+    Examples
+    --------
+    ::
+
+        import pandas as pd
+        from whylogs.experimental.api.logger import log_batch_ranking_metrics
+
+        # 1st and 2nd recommended items are relevant - 3rd is not
+        df = pd.DataFrame({"targets": [[1, 0, 1]], "predictions": [[2,3,1]]})
+        results = log_batch_ranking_metrics(
+            data=df,
+            prediction_column="predictions",
+            target_column="targets",
+            k=3,
+        )
+
+    ::
+
+        non_numerical_df = pd.DataFrame(
+            {
+                "raw_predictions": [
+                    ["cat", "pig", "elephant"],
+                    ["horse", "donkey", "robin"],
+                ],
+                "raw_targets": [
+                    ["cat", "elephant"],
+                    ["dog"],
+                ],
+            }
+        )
+
+        # 1st query:
+        # Recommended items: [cat, pig, elephant]
+        # Relevant items: [cat, elephant]
+
+
+        # 2nd query:
+        # Recommended items: [horse, donkey, robin]
+        # Relevant items: [dog]
+
+        results = log_batch_ranking_metrics(
+            k=2,
+            data=non_numerical_df,
+            prediction_column="raw_predictions",
+            target_column="raw_targets",
+            convert_non_numeric=True
+        )
+
+    ::
+
+        binary_single_df = pd.DataFrame(
+            {
+                "raw_predictions": [
+                    [True, False, True], # First recommended item: Relevant, Second: Not relevant, Third: Relevant
+                    [False, False, False], # None of the recommended items are relevant
+                    [True, True, False], # First and second recommended items are relevant
+                ]
+            }
+        )
+
+        result = log_batch_ranking_metrics(data=binary_single_df, prediction_column="raw_predictions", k=3)
+
+    """
     formatted_data = data.copy(deep=True)  # TODO: does this have to be deep?
 
     if prediction_column is None: