diff --git a/python/metrics/evaluation_tools/metrics/metrics.py b/python/metrics/evaluation_tools/metrics/metrics.py index 26a5b0bc..295b0429 100644 --- a/python/metrics/evaluation_tools/metrics/metrics.py +++ b/python/metrics/evaluation_tools/metrics/metrics.py @@ -19,12 +19,98 @@ - percent_correct - base_chance - equitable_threat_score + - mean_squared_error + - nash_sutcliffe_efficiency """ +import numpy as np +import numpy.typing as npt import pandas as pd from typing import Union +def mean_squared_error( + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + root: bool = False + ) -> float: + """Compute the mean squared error, or optionally root mean squared error. + + Parameters + ---------- + y_true: array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: pandas.Series, required + Estimated target values, also called simulations or modeled values. + root: bool, default False + When True, return the root mean squared error. + + Returns + ------- + error: float + Mean squared error or root mean squared error. + + """ + # Compute mean squared error + MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true) + + # Return MSE, optionally return root mean squared error + if root: + return np.sqrt(MSE) + return MSE + +def nash_sutcliffe_efficiency( + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + log: bool = False, + normalized: bool = False + ) -> float: + """Compute the Nash–Sutcliffe model efficiency coefficient (NSE), also called the + mean squared error skill score or the R^2 (coefficient of determination) regression score. + + Parameters + ---------- + y_true: array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: pandas.Series, required + Estimated target values, also called simulations or modeled values. + log: bool, default False + Apply numpy.log (natural logarithm) to y_true and y_pred + before computing the NSE. + normalized: bool, default False + When True, normalize the final NSE value using the method from + Nossent & Bauwens, 2012. + + Returns + ------- + score: float + Nash–Sutcliffe model efficiency coefficient + + References + ---------- + Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through + conceptual models part I—A discussion of principles. Journal of + hydrology, 10(3), 282-290. + Nossent, J., & Bauwens, W. (2012, April). Application of a normalized + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' + sensitivity analysis of a hydrological model. In EGU General Assembly + Conference Abstracts (p. 237). + + """ + # Optionally transform components + if log: + y_true = np.log(y_true) + y_pred = np.log(y_pred) + + # Compute components + numerator = mean_squared_error(y_true, y_pred) + denominator = mean_squared_error(y_true, np.mean(y_true)) + + # Compute score, optionally normalize + if normalized: + return 1.0 / (1.0 + numerator/denominator) + return 1.0 - numerator/denominator + def compute_contingency_table( observed: pd.Series, simulated: pd.Series, @@ -38,9 +124,9 @@ def compute_contingency_table( Parameters ---------- observed: pandas.Series, required - pandas.Series of boolean pandas.Categorical values indicating observed occurences + pandas.Series of boolean pandas.Categorical values indicating observed occurrences simulated: pandas.Series, required - pandas.Series of boolean pandas.Categorical values indicating simulated occurences + pandas.Series of boolean pandas.Categorical values indicating simulated occurrences true_positive_key: str, optional, default 'true_positive' Label to use for true positives. false_positive_key: str, optional, default 'false_positive' diff --git a/python/metrics/setup.py b/python/metrics/setup.py index fe948c52..bd8b9b24 100644 --- a/python/metrics/setup.py +++ b/python/metrics/setup.py @@ -13,7 +13,7 @@ SUBPACKAGE_SLUG = f"{NAMESPACE_PACKAGE_NAME}.{SUBPACKAGE_NAME}" # Subpackage version -VERSION = "0.1.1+1" +VERSION = "0.1.2+1" # Package author information AUTHOR = "Jason Regina" diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py index 98007d80..04587e5a 100644 --- a/python/metrics/tests/test_metrics.py +++ b/python/metrics/tests/test_metrics.py @@ -3,6 +3,7 @@ import pandas as pd from math import isclose +import numpy as np contigency_table = { 'true_positive': 1, @@ -18,6 +19,9 @@ 'TN': 4 } +y_true = [1., 2., 3., 4.] +y_pred = [4., 3., 2., 1.] + def test_compute_contingency_table(): obs = pd.Categorical([True, False, False, True, True, True, False, False, False, False]) @@ -132,3 +136,27 @@ def test_equitable_threat_score(): true_negative_key='TN' ) assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001) + +def test_mean_squared_error(): + MSE = metrics.mean_squared_error(y_true, y_pred) + assert MSE == 5.0 + + RMSE = metrics.mean_squared_error(y_true, y_pred, root=True) + assert RMSE == np.sqrt(5.0) + +def test_nash_sutcliffe_efficiency(): + NSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred) + assert NSE == -3.0 + + NNSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred, + normalized=True) + assert NNSE == 0.2 + + NSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), + np.exp(y_pred), log=True) + assert NSEL == -3.0 + + NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), + np.exp(y_pred), log=True, normalized=True) + assert NNSEL == 0.2 + print(NNSEL) diff --git a/setup.py b/setup.py index 0e9a4b9c..d83142f4 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ MAINTAINER_EMAIL = "arthur.raney@noaa.gov" # Namespace package version -VERSION = "1.3.4+1" +VERSION = "1.3.5+1" URL = "https://github.com/NOAA-OWP/evaluation_tools" # Map subpackage namespace to relative location