Skip to content

Commit

Permalink
Add AICc for bias correction with small sample sizes (#727)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinvonk committed Apr 11, 2024
1 parent bce0bac commit eedba33
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 26 deletions.
2 changes: 1 addition & 1 deletion pastas/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1806,7 +1806,7 @@ def fit_report(
"EVP": f"{self.stats.evp():.2f}",
"R2": f"{self.stats.rsq():.2f}",
"RMSE": f"{self.stats.rmse():.2f}",
"AIC": f"{self.stats.aic():.2f}",
"AICc": f"{self.stats.aicc():.2f}",
"BIC": f"{self.stats.bic():.2f}",
"Obj": f"{self.solver.obj_func:.2f}",
"___": "",
Expand Down
27 changes: 26 additions & 1 deletion pastas/modelstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class Statistics:
"kge",
"bic",
"aic",
"aicc",
]

def __init__(self, ml: Model):
Expand Down Expand Up @@ -374,7 +375,7 @@ def aic(
See Also
--------
pastas.stats.bic
pastas.stats.aic
"""
nparam = self.ml.parameters["vary"].sum()
if self.ml.settings["noise"]:
Expand All @@ -385,6 +386,30 @@ def aic(
res = self.ml.residuals(tmin=tmin, tmax=tmax)
return metrics.aic(res=res, nparam=nparam)

@model_tmin_tmax
def aicc(
self, tmin: Optional[TimestampType] = None, tmax: Optional[TimestampType] = None
) -> float:
"""Akaike Information Criterium with second order bias correction (AICc).
Parameters
----------
tmin: str or pandas.Timestamp, optional
tmax: str or pandas.Timestamp, optional
See Also
--------
pastas.stats.aicc
"""
nparam = self.ml.parameters["vary"].sum()
if self.ml.settings["noise"]:
res = self.ml.noise(tmin=tmin, tmax=tmax) * self.ml.noise_weights(
tmin=tmin, tmax=tmax
)
else:
res = self.ml.residuals(tmin=tmin, tmax=tmax)
return metrics.aicc(res=res, nparam=nparam)

@model_tmin_tmax
def summary(
self,
Expand Down
9 changes: 5 additions & 4 deletions pastas/plotting/modelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,17 +842,18 @@ def plot_table_metrics(
name of labeled axes to plot table on, by default "met"
metric_selection : list, optional
list of str describing which metrics to include, by default None which
uses ["rsq", "aic"].
uses ["rsq", "aicc"].
"""
if metric_selection is None:
metric_selection = ["rsq", "aic"]
metric_selection = ["rsq", "aicc"]

metrics = self.get_metrics(self.models, metric_selection=metric_selection)
for met in ["aic", "bic"]:
for met in ["aic", "aicc", "bic"]:
if met in metrics.index:
metrics.loc[met] -= metrics.loc[met].min()
metname = "AICc" if met == "aicc" else met.upper()
metrics = metrics.rename(
index={met: f"\N{GREEK CAPITAL LETTER DELTA}{met.upper()}"}
index={met: f"\N{GREEK CAPITAL LETTER DELTA}{metname}"}
)
if "rsq" in metrics.index:
metrics = metrics.rename(index={"rsq": "R\N{SUPERSCRIPT TWO}"})
Expand Down
72 changes: 65 additions & 7 deletions pastas/stats/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
"rsq",
"bic",
"aic",
"aicc",
"pearsonr",
"kge",
]

captureWarnings(True)
logger = getLogger(__name__)
warnings_logger = getLogger("py.warnings")
warnings_logger.addHandler(logger)
logger.addHandler(warnings_logger)


# Absolute Error Metrics
Expand Down Expand Up @@ -483,13 +484,14 @@ def aic(
sim: pandas.Series, optional
The Series with the simulated values.
res: pandas.Series, optional
The Series with the residual values. If time series for the residuals are
provided, the sim and obs arguments are ignored. Note that the residuals
must be computed as `obs - sim` here.
The Series with the residual values. If time series for the residuals
are provided, the sim and obs arguments are ignored. Note that the
residuals must be computed as `obs - sim` here.
nparam: int, optional
number of calibrated parameters.
missing: str, optional
string with the rule to deal with missing values. Only "drop" is supported now.
string with the rule to deal with missing values. Only "drop" is
supported now.
Notes
-----
Expand All @@ -498,8 +500,14 @@ def aic(
.. math:: \\text{AIC} = -2 log(L) + 2 nparam
where :math:`n_{param}` is the number of calibration parameters and L is the
likelihood function for the model.
where :math:`n_{param}` is the number of calibration parameters and L is
the likelihood function for the model. In the case of ordinary least
squares:
.. math:: log(L) = - (nobs / 2) * log(RSS / -nobs)
where RSS denotes the residual sum of squares and nobs the number of
observations.
"""
err = _compute_err(obs=obs, sim=sim, res=res, missing=missing)

Expand All @@ -513,6 +521,56 @@ def aic(
return n * log((err.to_numpy() ** 2.0).sum() / n) + 2.0 * nparam


def aicc(
obs: Optional[Series] = None,
sim: Optional[Series] = None,
res: Optional[Series] = None,
missing: str = "drop",
nparam: int = 1,
) -> float:
"""Compute the Akaike Information Criterium with second order
bias correction for the number of observations (AICc)
Parameters
----------
obs: pandas.Series, optional
Series with the observed values.
sim: pandas.Series, optional
The Series with the simulated values.
res: pandas.Series, optional
The Series with the residual values. If time series for the residuals
are provided, the sim and obs arguments are ignored. Note that the
residuals must be computed as `obs - sim` here.
nparam: int, optional
number of calibrated parameters.
missing: str, optional
string with the rule to deal with missing values. Only "drop" is
supported now.
Notes
-----
The corrected Akaike Information Criterium (AICc)
:cite:p:`suguria_aicc_1978` is computed as follows:
.. math:: \\text{AIC} = -2 log(L) + 2 nparam - (2 nparam (nparam + 1) / (nobs - nparam - 1))
where :math:`n_{param}` is the number of calibration parameters, nobs is
the number of observations and L is the likelihood function for the model.
In the case of ordinary least squares:
.. math:: log(L) = - (nobs / 2) * log(RSS / -nobs)
where RSS denotes the residual sum of squares.
"""
err = _compute_err(obs=obs, sim=sim, res=res, missing=missing)

n = err.index.size

c_term = (2 * nparam * (nparam + 1)) / (n - nparam - 1)
return aic(res=-err, nparam=nparam) + c_term


# Forecast Error Metrics


Expand Down
32 changes: 19 additions & 13 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,61 +3,67 @@

import pastas as ps

obs = pd.Series([10.0, 15.0, 20.0, 25.0])
sim = pd.Series([12.0, 18.0, 22.0, 28.0])
index = pd.date_range("2010-01-01", "2010-01-04", freq="D")
obs = pd.Series([10.0, 15.21, 20.0, 25.2], index=index)
sim = pd.Series([10.1, 15.0, 20.3, 25.0], index=index)
tol = 1e-6


def test_mae():
mae = ps.stats.metrics.mae(obs=obs, sim=sim)
assert pytest.approx(mae, tol) == 2.5
assert pytest.approx(mae, tol) == 0.2025


def test_rmse():
rmse = ps.stats.metrics.rmse(obs=obs, sim=sim)
assert pytest.approx(rmse, tol) == 2.549509
assert pytest.approx(rmse, tol) == 0.2145343


def test_sse():
sse = ps.stats.metrics.sse(obs=obs, sim=sim)
assert pytest.approx(sse, tol) == 26.0
assert pytest.approx(sse, tol) == 0.1841


def test_pearsonr():
r = ps.stats.metrics.pearsonr(obs=obs, sim=sim)
assert pytest.approx(r, tol) == 0.997054
assert pytest.approx(r, tol) == 0.999299


def test_evp():
evp = ps.stats.metrics.evp(obs=obs, sim=sim)
assert pytest.approx(evp, tol) == 99.2
assert pytest.approx(evp, tol) == 99.85505


def test_nse():
nse = ps.stats.metrics.nse(obs=obs, sim=sim)
assert pytest.approx(nse, tol) == 0.792
assert pytest.approx(nse, tol) == 0.99855


def test_rsq():
rsq = ps.stats.metrics.rsq(obs=obs, sim=sim)
assert pytest.approx(rsq, tol) == 0.792
assert pytest.approx(rsq, tol) == 0.99855


def test_bic():
bic = ps.stats.metrics.bic(obs=obs, sim=sim)
assert pytest.approx(bic, tol) == 8.873503
assert pytest.approx(bic, tol) == -10.9279878


def test_aic():
aic = ps.stats.metrics.aic(obs=obs, sim=sim)
assert pytest.approx(aic, tol) == 9.487208
assert pytest.approx(aic, tol) == -10.314282


def test_aicc():
aicc = ps.stats.metrics.aicc(obs=obs, sim=sim)
assert pytest.approx(aicc, tol) == -8.314282


def test_kge():
kge = ps.stats.metrics.kge(obs=obs, sim=sim)
assert pytest.approx(kge, tol) == 0.850761
assert pytest.approx(kge, tol) == 0.9923303


def test_kge_modified():
kgem = ps.stats.metrics.kge(obs=obs, sim=sim, modified=True)
assert pytest.approx(kgem, tol) == 0.832548
assert pytest.approx(kgem, tol) == 0.99247

0 comments on commit eedba33

Please sign in to comment.