From 18aa54631131b9c97684b5e4b212f8bc0851638a Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Tue, 30 Mar 2021 16:22:01 -0500 Subject: [PATCH 1/6] add nash sutcliffe efficiency --- .../evaluation_tools/metrics/metrics.py | 47 +++++++++++++++++++ python/metrics/tests/test_metrics.py | 27 +++++++++++ 2 files changed, 74 insertions(+) diff --git a/python/metrics/evaluation_tools/metrics/metrics.py b/python/metrics/evaluation_tools/metrics/metrics.py index 26a5b0bc..11696346 100644 --- a/python/metrics/evaluation_tools/metrics/metrics.py +++ b/python/metrics/evaluation_tools/metrics/metrics.py @@ -19,12 +19,59 @@ - percent_correct - base_chance - equitable_threat_score + - mean_squared_error + - nash_sutcliffe_efficiency """ +import numpy as np import pandas as pd from typing import Union +def mean_squared_error( + y_true, + y_pred, + root=False + ): + """MSE""" + MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true) + + if root: + return np.sqrt(MSE) + return MSE + +def nash_sutcliffe_efficiency( + y_true, + y_pred, + log=False, + normalized=False + ): + """NSE + + Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through + conceptual models part I—A discussion of principles. Journal of + hydrology, 10(3), 282-290. + + Nossent, J., & Bauwens, W. (2012, April). Application of a normalized + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' + sensitivity analysis of a hydrological model. In EGU General Assembly + Conference Abstracts (p. 237). + + """ + # Optionally transform components + if log: + y_true = np.log(y_true) + y_pred = np.log(y_pred) + + # Compute components + numerator = mean_squared_error(y_true, y_pred) + denominator = mean_squared_error(y_true, np.mean(y_true)) + + # Compute score, optionally normalize + if normalized: + return 1.0 / (1.0 + numerator/denominator) + return 1.0 - numerator/denominator + def compute_contingency_table( observed: pd.Series, simulated: pd.Series, diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py index 98007d80..3d884ec2 100644 --- a/python/metrics/tests/test_metrics.py +++ b/python/metrics/tests/test_metrics.py @@ -3,6 +3,7 @@ import pandas as pd from math import isclose +import numpy as np contigency_table = { 'true_positive': 1, @@ -18,6 +19,9 @@ 'TN': 4 } +y_true = [1., 2., 3., 4.] +y_pred = [4., 3., 2., 1.] + def test_compute_contingency_table(): obs = pd.Categorical([True, False, False, True, True, True, False, False, False, False]) @@ -132,3 +136,26 @@ def test_equitable_threat_score(): true_negative_key='TN' ) assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001) + +def test_mean_squared_error(): + MSE = metrics.mean_squared_error(y_true, y_pred) + assert MSE == 5.0 + + RMSE = metrics.mean_squared_error(y_true, y_pred, root=True) + assert RMSE == np.sqrt(5.0) + +def test_nash_sutcliffe_efficiency(): + NSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred) + assert NSE == -3.0 + + NNSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred, + normalized=True) + assert NNSE == 0.2 + + NSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), + np.exp(y_pred), log=True) + assert NSEL == -3.0 + + NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), + np.exp(y_pred), log=True, normalized=True) + assert NNSEL == 0.2 From d043b664dfbc0d2aa979d0ac29fce70288f900c3 Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Thu, 1 Apr 2021 14:11:15 -0500 Subject: [PATCH 2/6] add docstrings --- .../evaluation_tools/metrics/metrics.py | 78 ++++++++++++++----- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/python/metrics/evaluation_tools/metrics/metrics.py b/python/metrics/evaluation_tools/metrics/metrics.py index 11696346..347b5010 100644 --- a/python/metrics/evaluation_tools/metrics/metrics.py +++ b/python/metrics/evaluation_tools/metrics/metrics.py @@ -25,37 +25,75 @@ """ import numpy as np +import numpy.typing as npt import pandas as pd from typing import Union def mean_squared_error( - y_true, - y_pred, - root=False - ): - """MSE""" + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + root: bool = False + ) -> float: + """Compute the mean squared error, or optionally root mean squared error. + + Parameters + ---------- + y_true: array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: pandas.Series, required + Estimated target values, also called simulations or modeled values. + root: bool, default False + When True, return the root mean squared error. + + Returns + ------- + error: float + Mean squared error or root mean squared error. + + """ + # Compute mean squared error MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true) + # Return MSE, optionally return root mean squared error if root: return np.sqrt(MSE) return MSE def nash_sutcliffe_efficiency( - y_true, - y_pred, - log=False, - normalized=False - ): - """NSE - + y_true: npt.ArrayLike, + y_pred: npt.ArrayLike, + log: bool = False, + normalized: bool = False + ) -> float: + """Compute the Nash–Sutcliffe model efficiency coefficient (NSE), also called the + mean squared error skill score or the R^2 (coefficient of determination) regression score. + + Parameters + ---------- + y_true: array-like of shape (n_samples,) or (n_samples, n_outputs) + Ground truth (correct) target values, also called observations, measurements, or observed values. + y_pred: pandas.Series, required + Estimated target values, also called simulations or modeled values. + log: bool, default False + When True, take the log of y_true and y_pred before computing the NSE. + normalized: bool, default False + When True, normalize the final NSE value using the method from + Nossent & Bauwens, 2012. + + Returns + ------- + score: float + Nash–Sutcliffe model efficiency coefficient + + References + ---------- Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through - conceptual models part I—A discussion of principles. Journal of - hydrology, 10(3), 282-290. - + conceptual models part I—A discussion of principles. Journal of + hydrology, 10(3), 282-290. Nossent, J., & Bauwens, W. (2012, April). Application of a normalized - Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' - sensitivity analysis of a hydrological model. In EGU General Assembly - Conference Abstracts (p. 237). + Nash-Sutcliffe efficiency to improve the accuracy of the Sobol' + sensitivity analysis of a hydrological model. In EGU General Assembly + Conference Abstracts (p. 237). """ # Optionally transform components @@ -85,9 +123,9 @@ def compute_contingency_table( Parameters ---------- observed: pandas.Series, required - pandas.Series of boolean pandas.Categorical values indicating observed occurences + pandas.Series of boolean pandas.Categorical values indicating observed occurrences simulated: pandas.Series, required - pandas.Series of boolean pandas.Categorical values indicating simulated occurences + pandas.Series of boolean pandas.Categorical values indicating simulated occurrences true_positive_key: str, optional, default 'true_positive' Label to use for true positives. false_positive_key: str, optional, default 'false_positive' From afa17253317a0f1686f9b6658a81582da25a5e6c Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Thu, 1 Apr 2021 14:11:45 -0500 Subject: [PATCH 3/6] test new mse and nse methods --- python/metrics/tests/test_metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/metrics/tests/test_metrics.py b/python/metrics/tests/test_metrics.py index 3d884ec2..04587e5a 100644 --- a/python/metrics/tests/test_metrics.py +++ b/python/metrics/tests/test_metrics.py @@ -159,3 +159,4 @@ def test_nash_sutcliffe_efficiency(): NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true), np.exp(y_pred), log=True, normalized=True) assert NNSEL == 0.2 + print(NNSEL) From 226e2722fc848a3447754f3753fe81097d2cd493 Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Thu, 1 Apr 2021 14:21:46 -0500 Subject: [PATCH 4/6] update suite version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0e9a4b9c..d83142f4 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ MAINTAINER_EMAIL = "arthur.raney@noaa.gov" # Namespace package version -VERSION = "1.3.4+1" +VERSION = "1.3.5+1" URL = "https://github.com/NOAA-OWP/evaluation_tools" # Map subpackage namespace to relative location From b98dfe5fe431017cad30da40f831123dd50429a2 Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Thu, 1 Apr 2021 14:21:56 -0500 Subject: [PATCH 5/6] update metrics version --- python/metrics/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/metrics/setup.py b/python/metrics/setup.py index fe948c52..bd8b9b24 100644 --- a/python/metrics/setup.py +++ b/python/metrics/setup.py @@ -13,7 +13,7 @@ SUBPACKAGE_SLUG = f"{NAMESPACE_PACKAGE_NAME}.{SUBPACKAGE_NAME}" # Subpackage version -VERSION = "0.1.1+1" +VERSION = "0.1.2+1" # Package author information AUTHOR = "Jason Regina" From b4857bebd19c341df416539f62f303bd594d4b4b Mon Sep 17 00:00:00 2001 From: Jason Regina Date: Thu, 1 Apr 2021 15:08:16 -0500 Subject: [PATCH 6/6] update docstring to specify natural logarithm --- python/metrics/evaluation_tools/metrics/metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/metrics/evaluation_tools/metrics/metrics.py b/python/metrics/evaluation_tools/metrics/metrics.py index 347b5010..295b0429 100644 --- a/python/metrics/evaluation_tools/metrics/metrics.py +++ b/python/metrics/evaluation_tools/metrics/metrics.py @@ -75,7 +75,8 @@ def nash_sutcliffe_efficiency( y_pred: pandas.Series, required Estimated target values, also called simulations or modeled values. log: bool, default False - When True, take the log of y_true and y_pred before computing the NSE. + Apply numpy.log (natural logarithm) to y_true and y_pred + before computing the NSE. normalized: bool, default False When True, normalize the final NSE value using the method from Nossent & Bauwens, 2012.