Merge pull request #55 from jarq6c/continuous_metrics

Add MSE and NSE methods to metrics subpackage
NOAA-OWP · Apr 1, 2021 · 9470d0e · 9470d0e
2 parents 062bc63 + b4857be
commit 9470d0e
Show file tree

Hide file tree

Showing 4 changed files with 118 additions and 4 deletions.
diff --git a/python/metrics/evaluation_tools/metrics/metrics.py b/python/metrics/evaluation_tools/metrics/metrics.py
@@ -19,12 +19,98 @@
  - percent_correct
  - base_chance
  - equitable_threat_score
+ - mean_squared_error
+ - nash_sutcliffe_efficiency
 
 """
 
+import numpy as np
+import numpy.typing as npt
 import pandas as pd
 from typing import Union
 
+def mean_squared_error(
+    y_true: npt.ArrayLike,
+    y_pred: npt.ArrayLike,
+    root: bool = False
+    ) -> float:
+    """Compute the mean squared error, or optionally root mean squared error.
+        
+    Parameters
+    ----------
+    y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
+        Ground truth (correct) target values, also called observations, measurements, or observed values.
+    y_pred: pandas.Series, required
+        Estimated target values, also called simulations or modeled values.
+    root: bool, default False
+        When True, return the root mean squared error.
+        
+    Returns
+    -------
+    error: float
+        Mean squared error or root mean squared error.
+    
+    """
+    # Compute mean squared error
+    MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true)
+
+    # Return MSE, optionally return root mean squared error
+    if root:
+        return np.sqrt(MSE)
+    return MSE
+
+def nash_sutcliffe_efficiency(
+    y_true: npt.ArrayLike,
+    y_pred: npt.ArrayLike,
+    log: bool = False,
+    normalized: bool = False
+    ) -> float:
+    """Compute the Nash–Sutcliffe model efficiency coefficient (NSE), also called the 
+    mean squared error skill score or the R^2 (coefficient of determination) regression score.
+        
+    Parameters
+    ----------
+    y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
+        Ground truth (correct) target values, also called observations, measurements, or observed values.
+    y_pred: pandas.Series, required
+        Estimated target values, also called simulations or modeled values.
+    log: bool, default False
+        Apply numpy.log (natural logarithm) to y_true and y_pred 
+        before computing the NSE.
+    normalized: bool, default False
+        When True, normalize the final NSE value using the method from 
+        Nossent & Bauwens, 2012.
+        
+    Returns
+    -------
+    score: float
+        Nash–Sutcliffe model efficiency coefficient
+        
+    References
+    ----------
+    Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through 
+        conceptual models part I—A discussion of principles. Journal of 
+        hydrology, 10(3), 282-290.
+    Nossent, J., & Bauwens, W. (2012, April). Application of a normalized 
+        Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' 
+        sensitivity analysis of a hydrological model. In EGU General Assembly 
+        Conference Abstracts (p. 237).
+    
+    """
+    # Optionally transform components
+    if log:
+        y_true = np.log(y_true)
+        y_pred = np.log(y_pred)
+
+    # Compute components
+    numerator = mean_squared_error(y_true, y_pred)
+    denominator = mean_squared_error(y_true, np.mean(y_true))
+
+    # Compute score, optionally normalize
+    if normalized:
+        return 1.0 / (1.0 + numerator/denominator)
+    return 1.0 - numerator/denominator
+
 def compute_contingency_table(
     observed: pd.Series,
     simulated: pd.Series,
@@ -38,9 +124,9 @@ def compute_contingency_table(
     Parameters
     ----------
     observed: pandas.Series, required
-        pandas.Series of boolean pandas.Categorical values indicating observed occurences
+        pandas.Series of boolean pandas.Categorical values indicating observed occurrences
     simulated: pandas.Series, required
-        pandas.Series of boolean pandas.Categorical values indicating simulated occurences
+        pandas.Series of boolean pandas.Categorical values indicating simulated occurrences
     true_positive_key: str, optional, default 'true_positive'
         Label to use for true positives.
     false_positive_key: str, optional, default 'false_positive'

diff --git a/python/metrics/setup.py b/python/metrics/setup.py
@@ -13,7 +13,7 @@
 SUBPACKAGE_SLUG = f"{NAMESPACE_PACKAGE_NAME}.{SUBPACKAGE_NAME}"
 
 # Subpackage version
-VERSION = "0.1.1+1"
+VERSION = "0.1.2+1"
 
 # Package author information
 AUTHOR = "Jason Regina"

diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py
@@ -3,6 +3,7 @@
 
 import pandas as pd
 from math import isclose
+import numpy as np
 
 contigency_table = {
     'true_positive': 1,
@@ -18,6 +19,9 @@
     'TN': 4
 }
 
+y_true = [1., 2., 3., 4.]
+y_pred = [4., 3., 2., 1.]
+
 def test_compute_contingency_table():
     obs = pd.Categorical([True, False, False, True, True, True,
         False, False, False, False])
@@ -132,3 +136,27 @@ def test_equitable_threat_score():
         true_negative_key='TN'
         )
     assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001)
+
+def test_mean_squared_error():
+    MSE = metrics.mean_squared_error(y_true, y_pred)
+    assert MSE == 5.0
+
+    RMSE = metrics.mean_squared_error(y_true, y_pred, root=True)
+    assert RMSE == np.sqrt(5.0)
+
+def test_nash_sutcliffe_efficiency():
+    NSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred)
+    assert NSE == -3.0
+
+    NNSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred, 
+        normalized=True)
+    assert NNSE == 0.2
+
+    NSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), 
+        np.exp(y_pred), log=True)
+    assert NSEL == -3.0
+
+    NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), 
+        np.exp(y_pred), log=True, normalized=True)
+    assert NNSEL == 0.2
+    print(NNSEL)
diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@
 MAINTAINER_EMAIL = "arthur.raney@noaa.gov"
 
 # Namespace package version
-VERSION = "1.3.4+1"
+VERSION = "1.3.5+1"
 URL = "https://github.com/NOAA-OWP/evaluation_tools"
 
 # Map subpackage namespace to relative location