diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 6e077de2134..b15e683f290 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -27,7 +27,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
diff --git a/.github/workflows/generate-documentation.yml b/.github/workflows/generate-documentation.yml
index 60730e23c1b..2fe0b96278c 100644
--- a/.github/workflows/generate-documentation.yml
+++ b/.github/workflows/generate-documentation.yml
@@ -25,18 +25,18 @@ jobs:
python-version: ["3.10"]
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout statsmodels.github.io without token
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 1
repository: statsmodels/statsmodels.github.io
path: statsmodels.github.io
if: ${{ github.event_name == 'pull_request' }}
- name: Checkout statsmodels.github.io with token
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 1
repository: statsmodels/statsmodels.github.io
@@ -52,7 +52,7 @@ jobs:
- name: Install graphviz
uses: ts-graphviz/setup-graphviz@v1
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
diff --git a/.gitignore b/.gitignore
index 38442588fb7..0c32bf53261 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,10 +18,13 @@ examples/executed
#setuptools-scm generated version file
statsmodels/_version.py
-# generated c source and built extensions
+# Generated c source and built extensions
+# and Byte-compiled / optimized / DLL files
*.c
*.so
-*.pyd
+*.py[cod]
+__pycache__/
+
# repository directories for bzr-git
.bzr
@@ -104,3 +107,6 @@ statsmodels/tsa/statespace/_smoothers/_classical.pyx
# Temporary copies for packaging
statsmodels/setup.cfg
statsmodels/LICENSE.txt
+
+# Dataset downloads
+statsmodels/datasets/tests/*.zip
\ No newline at end of file
diff --git a/INSTALL.txt b/INSTALL.txt
index 6e7aa94f8dd..fb77f2f5269 100644
--- a/INSTALL.txt
+++ b/INSTALL.txt
@@ -21,7 +21,7 @@ patsy >= 0.5.2
patsy.readthedocs.org
-cython >= 0.29.26
+cython >= 0.29.33 and < 4.0.0
https://cython.org/
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 41cbd092543..bf8ed8bbbd9 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -16,7 +16,6 @@ variables:
VML_NUM_THREADS: 1
OPENBLAS_NUM_THREADS: 1
PYTHONHASHSEED: 0 # Ensure tests are correctly gathered by xdist
- SETUPTOOLS_USE_DISTUTILS: "stdlib"
USE_MATPLOTLIB: true
jobs:
diff --git a/docs/source/install.rst b/docs/source/install.rst
index 8d2741d131d..e35e7677c9c 100644
--- a/docs/source/install.rst
+++ b/docs/source/install.rst
@@ -134,7 +134,7 @@ The current minimum dependencies are:
Cython is required to build from a git checkout but not to run or install from PyPI:
-* `Cython `__ >= 0.29.26 is required to build the code from
+* `Cython `__ >= 0.29.33 is required to build the code from
github but not from a source distribution.
Given the long release cycle, statsmodels follows a loose time-based policy for
diff --git a/pyproject.toml b/pyproject.toml
index 0b6e99556b8..a8af48ebc85 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,14 +2,15 @@
# These are strictly build requirements. Runtime requirements are listed in
# INSTALL_REQUIRES in setup.py
requires = [
- "setuptools>=59.2.0",
- "cython>=0.29.26,<3", # Sync with CYTHON_MIN_VER in setup
+ "setuptools>=69.0.2; python_version>='3.12'",
+ "setuptools>=63.4.3",
+ "cython>=0.29.33,<4", # Sync with CYTHON_MIN_VER in setup
# Workaround for oldest supported numpy using 1.21.6, but SciPy 1.9.2+ requiring 1.22.3+
"oldest-supported-numpy; python_version!='3.10' or platform_system!='Windows' or platform_python_implementation=='PyPy'",
"numpy>=1.22.3,<2; python_version=='3.10' and platform_system=='Windows' and platform_python_implementation != 'PyPy'",
"numpy<2; python_version>='3.12'",
"scipy>=1.4",
- "setuptools_scm[toml]>=7,<8"
+ "setuptools_scm[toml]>=8,<9"
]
build-backend = "setuptools.build_meta"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 06fcef5930d..aa05144d6e9 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,6 +1,6 @@
# build
-cython>=0.29.28,<3.0.0
-setuptools_scm[toml]~=7.0
+cython>=0.29.33,<4.0.0
+setuptools_scm[toml]~=8.0
oldest-supported-numpy>=2022.4.18
# run
@@ -14,6 +14,7 @@ joblib
pytest>=7.3.0
pytest-randomly
pytest-xdist
+pytest-cov
# Pin on Win32
pywinpty; os_name == "nt"
diff --git a/requirements.txt b/requirements.txt
index 3e293f86e31..2d5bf7d9e2f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,5 +4,5 @@ numpy >=1.18,<2 # released December 2019
scipy>=1.4,!=1.9.2 # released December 2019
scipy>=1.4,!=1.9.2; sys_platform == "win32" # Blocked 1.9.2 due to Windows issues
pandas>=1.0,!=2.1.0 # released January 2020, 2.1.0 blocked due to bug
-patsy>=0.5.2 # released January 2018
+patsy>=0.5.4 # released January 2018
packaging>=21.3 # released Nov 2021
diff --git a/setup.cfg b/setup.cfg
index 57c41e0abff..f18bd97ff67 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,7 +23,6 @@ filterwarnings =
ignore:The --strict option is deprecated:pytest.PytestDeprecationWarning:
error:genfromdta:FutureWarning
error:StataReader:FutureWarning
- error:Estimation of VARMA:statsmodels.tools.sm_exceptions.EstimationWarning
error:Care should be used:UserWarning
error::statsmodels.tools.sm_exceptions.HypothesisTestWarning
error::statsmodels.tools.sm_exceptions.SpecificationWarning
@@ -79,6 +78,21 @@ filterwarnings =
error:A value is trying to be set on a copy::
error:Conversion of an array with ndim:DeprecationWarning:
error:Series.__getitem__ treating keys:FutureWarning:
+ error:'Y' is deprecated, please use 'YE' instead:FutureWarning
+ error:'A' is deprecated, please use 'YE' instead:FutureWarning
+ error:'H' is deprecated and will:FutureWarning
+ error:'M' is deprecated, please use:FutureWarning
+ error:'Q' is deprecated, please use 'QE' instead:FutureWarning
+ error:'Q-DEC' is deprecated, please use 'QE-DEC' instead:FutureWarning
+ error:'Q-JAN' is deprecated, please use 'QE-JAN' instead:FutureWarning
+ error:'BQ-MAR' is deprecated, please use 'BQE-MAR' instead:FutureWarning
+ error:'AS-MAR' is deprecated and will be removed in a future version:FutureWarning
+ error:Downcasting object dtype arrays on:FutureWarning
+ error:The previous implementation of stack is deprecated:FutureWarning
+ error:Series.__setitem__ treating keys as positions is deprecated:FutureWarning
+ error:The provided callable:FutureWarning
+ error:divide by zero encountered in log1p:RuntimeWarning
+
markers =
example: mark a test that runs example code
matplotlib: mark a test that requires matplotlib
diff --git a/setup.py b/setup.py
index fb4adc7b4e3..7a353eff111 100644
--- a/setup.py
+++ b/setup.py
@@ -8,6 +8,7 @@
from setuptools import Command, Extension, find_packages, setup
from setuptools.dist import Distribution
+from packaging.version import parse
from collections import defaultdict
import fnmatch
import inspect
@@ -25,15 +26,16 @@
FORCE_C = int(os.environ.get("SM_FORCE_C", 0))
if FORCE_C:
raise ImportError("Force import error for testing")
- from Cython import Tempita
+ from Cython import Tempita, __version__ as cython_version
from Cython.Build import cythonize
from Cython.Distutils import build_ext
HAS_CYTHON = True
+ CYTHON_3 = parse(cython_version) >= parse("3.0")
except ImportError:
from setuptools.command.build_ext import build_ext
- HAS_CYTHON = False
+ HAS_CYTHON = CYTHON_3 = False
try:
import numpy # noqa: F401
@@ -57,7 +59,7 @@
for line in req.readlines():
DEVELOP_REQUIRES.append(line.split("#")[0].strip())
-CYTHON_MIN_VER = "0.29.26" # released 2020
+CYTHON_MIN_VER = "0.29.33" # released January 2023
EXTRAS_REQUIRE = {
"build": ["cython>=" + CYTHON_MIN_VER],
@@ -346,8 +348,9 @@ def process_tempita(source_name):
for extension in extensions:
requires_math = extension.name in EXT_REQUIRES_NUMPY_MATH_LIBS
update_extension(extension, requires_math=requires_math)
-
if HAS_CYTHON:
+ if CYTHON_3:
+ COMPILER_DIRECTIVES["cpow"] = True
extensions = cythonize(
extensions,
compiler_directives=COMPILER_DIRECTIVES,
diff --git a/statsmodels/base/tests/test_optimize.py b/statsmodels/base/tests/test_optimize.py
index 9a17f43a972..e550dafe678 100644
--- a/statsmodels/base/tests/test_optimize.py
+++ b/statsmodels/base/tests/test_optimize.py
@@ -85,7 +85,13 @@ def test_full_output_false(reset_randomstate):
else:
xopt, retvals = func(
- dummy_func, dummy_score, [1], (), {}, full_output=False, disp=0
+ dummy_func,
+ dummy_score,
+ [1.0],
+ (),
+ {},
+ full_output=False,
+ disp=0
)
assert_(retvals is None)
if method == "powell" and SP_LT_15:
@@ -102,7 +108,7 @@ def test_full_output(reset_randomstate):
xopt, retvals = func(
dummy_func,
dummy_score,
- [1],
+ [1.0],
(),
{},
hess=dummy_hess,
@@ -112,7 +118,13 @@ def test_full_output(reset_randomstate):
else:
xopt, retvals = func(
- dummy_func, dummy_score, [1], (), {}, full_output=True, disp=0
+ dummy_func,
+ dummy_score,
+ [1.0],
+ (),
+ {},
+ full_output=True,
+ disp=0
)
assert_(retvals is not None)
@@ -130,7 +142,7 @@ def test_minimize_scipy_slsqp():
xopt, _ = func(
dummy_bounds_constraint_func,
None,
- (2, 0),
+ (2.0, 0.0),
(),
{
"min_method": "SLSQP",
diff --git a/statsmodels/compat/pandas.py b/statsmodels/compat/pandas.py
index 5b804942032..3af29c946d5 100644
--- a/statsmodels/compat/pandas.py
+++ b/statsmodels/compat/pandas.py
@@ -32,11 +32,17 @@
"call_cached_func",
"PD_LT_1_4",
"PD_LT_2",
+ "MONTH_END",
+ "QUARTER_END",
+ "YEAR_END",
+ "FUTURE_STACK",
]
version = parse(pd.__version__)
-PD_LT_1_0_0 = version < Version("1.0.0")
+PD_LT_2_2_0 = version < Version("2.1.99")
+PD_LT_2_1_0 = version < Version("2.0.99")
+PD_LT_1_0_0 = version < Version("0.99.0")
PD_LT_1_4 = version < Version("1.3.99")
PD_LT_2 = version < Version("1.9.99")
@@ -174,3 +180,9 @@ def call_cached_func(cached_prop, *args, **kwargs):
def get_cached_doc(cached_prop) -> Optional[str]:
return get_cached_func(cached_prop).__doc__
+
+
+MONTH_END = "M" if PD_LT_2_2_0 else "ME"
+QUARTER_END = "Q" if PD_LT_2_2_0 else "QE"
+YEAR_END = "Y" if PD_LT_2_2_0 else "YE"
+FUTURE_STACK = {} if PD_LT_2_1_0 else {"future_stack": True}
diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py
index fb79818e3f8..c74d324212a 100644
--- a/statsmodels/discrete/tests/test_discrete.py
+++ b/statsmodels/discrete/tests/test_discrete.py
@@ -2586,16 +2586,15 @@ def test_cov_confint_pandas():
assert isinstance(ci.index, pd.MultiIndex)
-def test_t_test():
+def test_mlogit_t_test():
# GH669, check t_test works in multivariate model
- data = load_anes96()
+ data = sm.datasets.anes96.load()
exog = sm.add_constant(data.exog, prepend=False)
res1 = sm.MNLogit(data.endog, exog).fit(disp=0)
r = np.ones(res1.cov_params().shape[0])
t1 = res1.t_test(r)
f1 = res1.f_test(r)
- data = sm.datasets.anes96.load()
exog = sm.add_constant(data.exog, prepend=False)
endog, exog = np.asarray(data.endog), np.asarray(exog)
res2 = sm.MNLogit(endog, exog).fit(disp=0)
@@ -2604,3 +2603,20 @@ def test_t_test():
assert_allclose(t1.effect, t2.effect)
assert_allclose(f1.statistic, f2.statistic)
+
+ tt = res1.t_test(np.eye(np.size(res2.params)))
+ assert_allclose(tt.tvalue.reshape(6,6, order="F"), res1.tvalues.to_numpy())
+ tt = res2.t_test(np.eye(np.size(res2.params)))
+ assert_allclose(tt.tvalue.reshape(6,6, order="F"), res2.tvalues)
+
+ wt = res1.wald_test(np.eye(np.size(res2.params))[0], scalar=True)
+ assert_allclose(wt.pvalue, res1.pvalues.to_numpy()[0, 0])
+
+
+ tt = res1.t_test("y1_logpopul")
+ wt = res1.wald_test("y1_logpopul", scalar=True)
+ assert_allclose(tt.pvalue, wt.pvalue)
+
+ wt = res1.wald_test("y1_logpopul, y2_logpopul", scalar=True)
+ # regression test
+ assert_allclose(wt.statistic, 5.68660562, rtol=1e-8)
diff --git a/statsmodels/discrete/tests/test_sandwich_cov.py b/statsmodels/discrete/tests/test_sandwich_cov.py
index 2d641207602..70738aa9415 100644
--- a/statsmodels/discrete/tests/test_sandwich_cov.py
+++ b/statsmodels/discrete/tests/test_sandwich_cov.py
@@ -9,6 +9,8 @@
import os
import numpy as np
import pandas as pd
+import pytest
+
import statsmodels.discrete.discrete_model as smd
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families
@@ -499,6 +501,21 @@ def test_score_test(self):
res_lm2 = res2.score_test(exog_extra, cov_type='nonrobust')
assert_allclose(np.hstack(res_lm1), np.hstack(res_lm2), rtol=5e-7)
+ def test_margeff(self):
+ if (isinstance(self.res2.model, OLS) or
+ hasattr(self.res1.model, "offset")):
+ pytest.skip("not available yet")
+
+ marg1 = self.res1.get_margeff()
+ marg2 = self.res2.get_margeff()
+ assert_allclose(marg1.margeff, marg2.margeff, rtol=1e-10)
+ assert_allclose(marg1.margeff_se, marg2.margeff_se, rtol=1e-10)
+
+ marg1 = self.res1.get_margeff(count=True, dummy=True)
+ marg2 = self.res2.get_margeff(count=True, dummy=True)
+ assert_allclose(marg1.margeff, marg2.margeff, rtol=1e-10)
+ assert_allclose(marg1.margeff_se, marg2.margeff_se, rtol=1e-10)
+
class TestGLMPoisson(CheckDiscreteGLM):
diff --git a/statsmodels/emplike/originregress.py b/statsmodels/emplike/originregress.py
index 15013c36639..28956a5e3b6 100644
--- a/statsmodels/emplike/originregress.py
+++ b/statsmodels/emplike/originregress.py
@@ -207,7 +207,7 @@ def el_test(self, b0_vals, param_nums, method='nm',
def conf_int_el(self, param_num, upper_bound=None,
lower_bound=None, sig=.05, method='nm',
- stochastic_exog=1):
+ stochastic_exog=True):
"""
Returns the confidence interval for a regression parameter when the
regression is forced through the origin.
@@ -217,22 +217,20 @@ def conf_int_el(self, param_num, upper_bound=None,
param_num : int
The parameter number to be tested. Note this uses python
indexing but the '0' parameter refers to the intercept term.
-
upper_bound : float
The maximum value the upper confidence limit can be. The
closer this is to the confidence limit, the quicker the
computation. Default is .00001 confidence limit under normality.
-
lower_bound : float
The minimum value the lower confidence limit can be.
Default is .00001 confidence limit under normality.
-
sig : float, optional
The significance level. Default .05.
-
method : str, optional
Algorithm to optimize of nuisance params. Can be 'nm' or
'powell'. Default is 'nm'.
+ stochastic_exog : bool
+ Default is True.
Returns
-------
@@ -247,9 +245,14 @@ def conf_int_el(self, param_num, upper_bound=None,
if lower_bound is None:
ci = np.asarray(self.model.fit().conf_int(.0001))
lower_bound = (np.squeeze(ci[param_num])[0])
- f = lambda b0: self.el_test(np.array([b0]), param_num,
- method=method,
- stochastic_exog=stochastic_exog)[0] - r0
+
+ def f(b0):
+ b0 = np.array([b0])
+ val = self.el_test(
+ b0, param_num, method=method, stochastic_exog=stochastic_exog
+ )
+ return val[0] - r0
+
_param = np.squeeze(self.params[param_num])
lowerl = optimize.brentq(f, np.squeeze(lower_bound), _param)
upperl = optimize.brentq(f, _param, np.squeeze(upper_bound))
diff --git a/statsmodels/genmod/generalized_linear_model.py b/statsmodels/genmod/generalized_linear_model.py
index c0799b3d7e8..849e7c61aaa 100644
--- a/statsmodels/genmod/generalized_linear_model.py
+++ b/statsmodels/genmod/generalized_linear_model.py
@@ -54,6 +54,7 @@
# need import in module instead of lazily to copy `__doc__`
from . import families
+
__all__ = ['GLM', 'PredictionResultsMean']
@@ -624,6 +625,90 @@ def information(self, params, scale=None):
scale = float_like(scale, "scale", optional=True)
return self.hessian(params, scale=scale, observed=False)
+ def _derivative_exog(self, params, exog=None, transform="dydx",
+ dummy_idx=None, count_idx=None,
+ offset=None, exposure=None):
+ """
+ Derivative of mean, expected endog with respect to the parameters
+ """
+ if exog is None:
+ exog = self.exog
+ if (offset is not None) or (exposure is not None):
+ raise NotImplementedError("offset and exposure not supported")
+
+ lin_pred = self.predict(params, exog, which="linear",
+ offset=offset, exposure=exposure)
+
+ k_extra = getattr(self, 'k_extra', 0)
+ params_exog = params if k_extra == 0 else params[:-k_extra]
+
+ margeff = (self.family.link.inverse_deriv(lin_pred)[:, None] *
+ params_exog)
+ if 'ex' in transform:
+ margeff *= exog
+ if 'ey' in transform:
+ mean = self.family.link.inverse(lin_pred)
+ margeff /= mean[:,None]
+
+ return self._derivative_exog_helper(margeff, params, exog,
+ dummy_idx, count_idx, transform)
+
+ def _derivative_exog_helper(self, margeff, params, exog, dummy_idx,
+ count_idx, transform):
+ """
+ Helper for _derivative_exog to wrap results appropriately
+ """
+ from statsmodels.discrete.discrete_margins import (
+ _get_count_effects,
+ _get_dummy_effects,
+ )
+
+ if count_idx is not None:
+ margeff = _get_count_effects(margeff, exog, count_idx, transform,
+ self, params)
+ if dummy_idx is not None:
+ margeff = _get_dummy_effects(margeff, exog, dummy_idx, transform,
+ self, params)
+
+ return margeff
+
+ def _derivative_predict(self, params, exog=None, transform='dydx',
+ offset=None, exposure=None):
+ """
+ Derivative of the expected endog with respect to the parameters.
+
+ Parameters
+ ----------
+ params : ndarray
+ parameter at which score is evaluated
+ exog : ndarray or None
+ Explanatory variables at which derivative are computed.
+ If None, then the estimation exog is used.
+ offset, exposure : None
+ Not yet implemented.
+
+ Returns
+ -------
+ The value of the derivative of the expected endog with respect
+ to the parameter vector.
+ """
+ # core part is same as derivative_mean_params
+ # additionally handles exog and transform
+ if exog is None:
+ exog = self.exog
+ if (offset is not None) or (exposure is not None) or (
+ getattr(self, 'offset', None) is not None):
+ raise NotImplementedError("offset and exposure not supported")
+
+ lin_pred = self.predict(params, exog=exog, which="linear")
+ idl = self.family.link.inverse_deriv(lin_pred)
+ dmat = exog * idl[:, None]
+ if 'ey' in transform:
+ mean = self.family.link.inverse(lin_pred)
+ dmat /= mean[:, None]
+
+ return dmat
+
def _deriv_mean_dparams(self, params):
"""
Derivative of the expected endog with respect to the parameters.
@@ -2210,6 +2295,92 @@ def get_distribution(self, exog=None, exposure=None,
mu, scale, var_weights=var_weights, **kwds)
return distr
+ def get_margeff(self, at='overall', method='dydx', atexog=None,
+ dummy=False, count=False):
+ """Get marginal effects of the fitted model.
+
+ Warning: offset, exposure and weights (var_weights and freq_weights)
+ are not supported by margeff.
+
+ Parameters
+ ----------
+ at : str, optional
+ Options are:
+
+ - 'overall', The average of the marginal effects at each
+ observation.
+ - 'mean', The marginal effects at the mean of each regressor.
+ - 'median', The marginal effects at the median of each regressor.
+ - 'zero', The marginal effects at zero for each regressor.
+ - 'all', The marginal effects at each observation. If `at` is all
+ only margeff will be available from the returned object.
+
+ Note that if `exog` is specified, then marginal effects for all
+ variables not specified by `exog` are calculated using the `at`
+ option.
+ method : str, optional
+ Options are:
+
+ - 'dydx' - dy/dx - No transformation is made and marginal effects
+ are returned. This is the default.
+ - 'eyex' - estimate elasticities of variables in `exog` --
+ d(lny)/d(lnx)
+ - 'dyex' - estimate semi-elasticity -- dy/d(lnx)
+ - 'eydx' - estimate semi-elasticity -- d(lny)/dx
+
+ Note that tranformations are done after each observation is
+ calculated. Semi-elasticities for binary variables are computed
+ using the midpoint method. 'dyex' and 'eyex' do not make sense
+ for discrete variables. For interpretations of these methods
+ see notes below.
+ atexog : array_like, optional
+ Optionally, you can provide the exogenous variables over which to
+ get the marginal effects. This should be a dictionary with the key
+ as the zero-indexed column number and the value of the dictionary.
+ Default is None for all independent variables less the constant.
+ dummy : bool, optional
+ If False, treats binary variables (if present) as continuous. This
+ is the default. Else if True, treats binary variables as
+ changing from 0 to 1. Note that any variable that is either 0 or 1
+ is treated as binary. Each binary variable is treated separately
+ for now.
+ count : bool, optional
+ If False, treats count variables (if present) as continuous. This
+ is the default. Else if True, the marginal effect is the
+ change in probabilities when each observation is increased by one.
+
+ Returns
+ -------
+ DiscreteMargins : marginal effects instance
+ Returns an object that holds the marginal effects, standard
+ errors, confidence intervals, etc. See
+ `statsmodels.discrete.discrete_margins.DiscreteMargins` for more
+ information.
+
+ Notes
+ -----
+ Interpretations of methods:
+
+ - 'dydx' - change in `endog` for a change in `exog`.
+ - 'eyex' - proportional change in `endog` for a proportional change
+ in `exog`.
+ - 'dyex' - change in `endog` for a proportional change in `exog`.
+ - 'eydx' - proportional change in `endog` for a change in `exog`.
+
+ When using after Poisson, returns the expected number of events per
+ period, assuming that the model is loglinear.
+
+ Status : unsupported features offset, exposure and weights. Default
+ handling of freq_weights for average effect "overall" might change.
+
+ """
+ if getattr(self.model, "offset", None) is not None:
+ raise NotImplementedError("Margins with offset are not available.")
+ if (np.any(self.model.var_weights != 1) or
+ np.any(self.model.freq_weights != 1)):
+ warnings.warn("weights are not taken into account by margeff")
+ from statsmodels.discrete.discrete_margins import DiscreteMargins
+ return DiscreteMargins(self, (at, method, atexog, dummy, count))
@Appender(base.LikelihoodModelResults.remove_data.__doc__)
def remove_data(self):
diff --git a/statsmodels/graphics/tests/test_tsaplots.py b/statsmodels/graphics/tests/test_tsaplots.py
index 2776bbc9b35..5ba24f02a0f 100644
--- a/statsmodels/graphics/tests/test_tsaplots.py
+++ b/statsmodels/graphics/tests/test_tsaplots.py
@@ -1,3 +1,4 @@
+from statsmodels.compat.pandas import MONTH_END
from statsmodels.compat.python import lmap
import calendar
@@ -362,7 +363,9 @@ def test_predict_plot(use_pandas, model_and_args, alpha):
y[i] += 1.8 * y[i - 1] - 0.9 * y[i - 2]
y = y[100:]
if use_pandas:
- index = pd.date_range("1960-1-1", freq="M", periods=y.shape[0] + 24)
+ index = pd.date_range(
+ "1960-1-1", freq=MONTH_END, periods=y.shape[0] + 24
+ )
start = index[index.shape[0] // 2]
end = index[-1]
y = pd.Series(y, index=index[:-24])
@@ -372,3 +375,14 @@ def test_predict_plot(use_pandas, model_and_args, alpha):
res = model(y, **kwargs).fit()
fig = plot_predict(res, start, end, alpha=alpha)
assert isinstance(fig, plt.Figure)
+
+
+@pytest.mark.matplotlib
+def test_plot_pacf_small_sample():
+ idx = [pd.Timestamp.now() + pd.Timedelta(seconds=i) for i in range(10)]
+ df = pd.DataFrame(
+ index=idx,
+ columns=["a"],
+ data=list(range(10))
+ )
+ plot_pacf(df)
diff --git a/statsmodels/graphics/tsaplots.py b/statsmodels/graphics/tsaplots.py
index ddeec5293cb..c3358cd2829 100644
--- a/statsmodels/graphics/tsaplots.py
+++ b/statsmodels/graphics/tsaplots.py
@@ -17,7 +17,7 @@ def _prepare_data_corr_plot(x, lags, zero):
if lags is None:
# GH 4663 - use a sensible default value
nobs = x.shape[0]
- lim = min(int(np.ceil(10 * np.log10(nobs))), nobs - 1)
+ lim = min(int(np.ceil(10 * np.log10(nobs))), nobs // 2)
lags = np.arange(not zero, lim + 1)
elif np.isscalar(lags):
lags = np.arange(not zero, int(lags) + 1) # +1 for zero lag
diff --git a/statsmodels/imputation/tests/test_mice.py b/statsmodels/imputation/tests/test_mice.py
index 9086c509e1b..7d89260cdde 100644
--- a/statsmodels/imputation/tests/test_mice.py
+++ b/statsmodels/imputation/tests/test_mice.py
@@ -36,17 +36,17 @@ def gendat():
Create a data set with missing values.
"""
- np.random.seed(34243)
+ gen = np.random.RandomState(34243)
n = 200
p = 5
- exog = np.random.normal(size=(n, p))
+ exog = gen.normal(size=(n, p))
exog[:, 0] = exog[:, 1] - exog[:, 2] + 2*exog[:, 4]
- exog[:, 0] += np.random.normal(size=n)
+ exog[:, 0] += gen.normal(size=n)
exog[:, 2] = 1*(exog[:, 2] > 0)
- endog = exog.sum(1) + np.random.normal(size=n)
+ endog = exog.sum(1) + gen.normal(size=n)
df = pd.DataFrame(exog)
df.columns = ["x%d" % k for k in range(1, p+1)]
@@ -97,7 +97,12 @@ def test_default(self):
fml = 'x1 ~ x2 + x3 + x4 + x5 + y'
assert_equal(imp_data.conditional_formula['x1'], fml)
- assert_equal(imp_data._cycle_order, ['x5', 'x3', 'x4', 'y', 'x2', 'x1'])
+ # Order of 3 and 4 is not deterministic
+ # since both have 10 missing
+ assert tuple(imp_data._cycle_order) in (
+ ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+ ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+ )
# Should make a copy
assert not (df is imp_data.data)
@@ -161,17 +166,21 @@ def test_pertmeth(self):
assert_equal(imp_data.data.shape[1], ncol)
assert_allclose(orig[mx], imp_data.data[mx])
- assert_equal(imp_data._cycle_order, ['x5', 'x3', 'x4', 'y', 'x2', 'x1'])
-
+ # Order of 3 and 4 is not deterministic
+ # since both have 10 missing
+ assert tuple(imp_data._cycle_order) in (
+ ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+ ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+ )
def test_phreg(self):
- np.random.seed(8742)
+ gen = np.random.RandomState(8742)
n = 300
- x1 = np.random.normal(size=n)
- x2 = np.random.normal(size=n)
- event_time = np.random.exponential(size=n) * np.exp(x1)
- obs_time = np.random.exponential(size=n)
+ x1 = gen.normal(size=n)
+ x2 = gen.normal(size=n)
+ event_time = gen.exponential(size=n) * np.exp(x1)
+ obs_time = gen.exponential(size=n)
time = np.where(event_time < obs_time, event_time, obs_time)
status = np.where(time == event_time, 1, 0)
df = pd.DataFrame({"time": time, "status": status, "x1": x1, "x2": x2})
@@ -236,7 +245,13 @@ def test_set_imputer(self):
fml = 'x4 ~ x1 + x2 + x3 + x5 + y'
assert_equal(imp_data.conditional_formula['x4'], fml)
- assert_equal(imp_data._cycle_order, ['x5', 'x3', 'x4', 'y', 'x2', 'x1'])
+ # Order of 3 and 4 is not deterministic
+ # since both have 10 missing
+ assert tuple(imp_data._cycle_order) in (
+ ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+ ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+ )
+
@pytest.mark.matplotlib
@@ -354,10 +369,10 @@ def test_MICE2(self):
@pytest.mark.slow
def t_est_combine(self):
- np.random.seed(3897)
- x1 = np.random.normal(size=300)
- x2 = np.random.normal(size=300)
- y = x1 + x2 + np.random.normal(size=300)
+ gen = np.random.RandomState(3897)
+ x1 = gen.normal(size=300)
+ x2 = gen.normal(size=300)
+ y = x1 + x2 + gen.normal(size=300)
x1[0:100] = np.nan
x2[250:] = np.nan
df = pd.DataFrame({"x1": x1, "x2": x2, "y": y})
@@ -377,8 +392,8 @@ def t_est_combine(self):
def test_micedata_miss1():
# test for #4375
- np.random.seed(0)
- data = pd.DataFrame(np.random.rand(50, 4))
+ gen = np.random.RandomState(3897)
+ data = pd.DataFrame(gen.rand(50, 4))
data.columns = ['var1', 'var2', 'var3', 'var4']
# one column with a single missing value
data.iloc[1, 1] = np.nan
diff --git a/statsmodels/iolib/summary.py b/statsmodels/iolib/summary.py
index 01f850b8886..88f9f364c02 100644
--- a/statsmodels/iolib/summary.py
+++ b/statsmodels/iolib/summary.py
@@ -450,7 +450,11 @@ def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True,
params_stubs = xname
exog_idx = lrange(len(xname))
-
+ params = np.asarray(params)
+ std_err = np.asarray(std_err)
+ tvalues = np.asarray(tvalues)
+ pvalues = np.asarray(pvalues)
+ conf_int = np.asarray(conf_int)
params_data = lzip([forg(params[i], prec=4) for i in exog_idx],
[forg(std_err[i]) for i in exog_idx],
[forg(tvalues[i]) for i in exog_idx],
@@ -469,7 +473,8 @@ def summary_params(results, yname=None, xname=None, alpha=.05, use_t=True,
def summary_params_frame(results, yname=None, xname=None, alpha=.05,
use_t=True):
- '''create a summary table for the parameters
+ """
+ Create a summary table for the parameters
Parameters
----------
@@ -492,7 +497,7 @@ def summary_params_frame(results, yname=None, xname=None, alpha=.05,
Returns
-------
params_table : SimpleTable instance
- '''
+ """
# Parameters part of the summary table
# ------------------------------------
@@ -529,7 +534,7 @@ def summary_params_frame(results, yname=None, xname=None, alpha=.05,
def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
title=None):
- '''create summary table of regression parameters with several equations
+ """create summary table of regression parameters with several equations
This allows interleaving of parameters with bse and/or tvalues
@@ -555,7 +560,7 @@ def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
the merged table with results concatenated for each row of the parameter
array
- '''
+ """
if endog_names is None:
# TODO: note the [1:] is specific to current MNLogit
endog_names = ['endog_%d' % i for i in
@@ -590,7 +595,7 @@ def summary_params_2d(result, extras=None, endog_names=None, exog_names=None,
def summary_params_2dflat(result, endog_names=None, exog_names=None, alpha=0.05,
use_t=True, keep_headers=True, endog_cols=False):
- '''summary table for parameters that are 2d, e.g. multi-equation models
+ """summary table for parameters that are 2d, e.g. multi-equation models
Parameters
----------
@@ -621,7 +626,7 @@ def summary_params_2dflat(result, endog_names=None, exog_names=None, alpha=0.05,
the merged table with results concatenated for each row of the parameter
array
- '''
+ """
res = result
params = res.params
@@ -669,7 +674,7 @@ def summary_params_2dflat(result, endog_names=None, exog_names=None, alpha=0.05,
def table_extend(tables, keep_headers=True):
- '''extend a list of SimpleTables, adding titles to header of subtables
+ """extend a list of SimpleTables, adding titles to header of subtables
This function returns the merged table as a deepcopy, in contrast to the
SimpleTable extend method.
@@ -686,7 +691,7 @@ def table_extend(tables, keep_headers=True):
table_all : SimpleTable
merged tables as a single SimpleTable instance
- '''
+ """
from copy import deepcopy
for ii, t in enumerate(tables[:]): #[1:]:
t = deepcopy(t)
@@ -762,11 +767,11 @@ def __repr__(self):
return str(type(self)) + '\n"""\n' + self.__str__() + '\n"""'
def _repr_html_(self):
- '''Display as HTML in IPython notebook.'''
+ """Display as HTML in IPython notebook."""
return self.as_html()
def _repr_latex_(self):
- '''Display as LaTeX when converting IPython notebook to PDF.'''
+ """Display as LaTeX when converting IPython notebook to PDF."""
return self.as_latex()
def add_table_2cols(self, res, title=None, gleft=None, gright=None,
@@ -799,7 +804,7 @@ def add_table_2cols(self, res, title=None, gleft=None, gright=None,
def add_table_params(self, res, yname=None, xname=None, alpha=.05,
use_t=True):
- '''create and add a table for the parameter estimates
+ """create and add a table for the parameter estimates
Parameters
----------
@@ -820,7 +825,7 @@ def add_table_params(self, res, yname=None, xname=None, alpha=.05,
-------
None : table is attached
- '''
+ """
if res.params.ndim == 1:
table = summary_params(res, yname=yname, xname=xname, alpha=alpha,
use_t=use_t)
@@ -833,32 +838,32 @@ def add_table_params(self, res, yname=None, xname=None, alpha=.05,
self.tables.append(table)
def add_extra_txt(self, etext):
- '''add additional text that will be added at the end in text format
+ """add additional text that will be added at the end in text format
Parameters
----------
etext : list[str]
string with lines that are added to the text output.
- '''
+ """
self.extra_txt = '\n'.join(etext)
def as_text(self):
- '''return tables as string
+ """return tables as string
Returns
-------
txt : str
summary tables and extra text as one string
- '''
+ """
txt = summary_return(self.tables, return_fmt='text')
if self.extra_txt is not None:
txt = txt + '\n\n' + self.extra_txt
return txt
def as_latex(self):
- '''return tables as string
+ """return tables as string
Returns
-------
@@ -871,35 +876,35 @@ def as_latex(self):
It is recommended to use `as_latex_tabular` directly on the individual
tables.
- '''
+ """
latex = summary_return(self.tables, return_fmt='latex')
if self.extra_txt is not None:
latex = latex + '\n\n' + self.extra_txt.replace('\n', ' \\newline\n ')
return latex
def as_csv(self):
- '''return tables as string
+ """return tables as string
Returns
-------
csv : str
concatenated summary tables in comma delimited format
- '''
+ """
csv = summary_return(self.tables, return_fmt='csv')
if self.extra_txt is not None:
csv = csv + '\n\n' + self.extra_txt
return csv
def as_html(self):
- '''return tables as string
+ """return tables as string
Returns
-------
html : str
concatenated summary tables in HTML format
- '''
+ """
html = summary_return(self.tables, return_fmt='html')
if self.extra_txt is not None:
html = html + '
' + self.extra_txt.replace('\n', '
')
diff --git a/statsmodels/iolib/summary2.py b/statsmodels/iolib/summary2.py
index 64d85acdf27..d874096a29e 100644
--- a/statsmodels/iolib/summary2.py
+++ b/statsmodels/iolib/summary2.py
@@ -1,3 +1,4 @@
+from statsmodels.compat.pandas import FUTURE_STACK
from statsmodels.compat.python import lzip
import datetime
@@ -416,7 +417,7 @@ def _col_params(result, float_format='%.4f', stars=True, include_r2=False):
res.loc[idx, res.columns[0]] = res.loc[idx, res.columns[0]] + '*'
# Stack Coefs and Std.Errors
res = res.iloc[:, :2]
- res = res.stack()
+ res = res.stack(**FUTURE_STACK)
# Add R-squared
if include_r2:
@@ -520,10 +521,21 @@ def summary_col(results, float_format='%.4f', model_names=(), stars=False,
cols[i].columns = [colnames[i]]
def merg(x, y):
- return x.merge(y, how='outer', right_index=True,
- left_index=True)
+ return x.merge(y, how='outer', right_index=True, left_index=True)
+
+ # Changes due to how pandas 2.2.0 handles merge
+ index = list(cols[0].index)
+ for col in cols[1:]:
+ for key in col.index:
+ if key not in index:
+ index.append(key)
+ for special in (('R-squared', ''), ('R-squared Adj.', '')):
+ if special in index:
+ index.remove(special)
+ index.insert(len(index), special)
summ = reduce(merg, cols)
+ summ = summ.reindex(index)
if regressor_order:
varnames = summ.index.get_level_values(0).tolist()
@@ -538,7 +550,7 @@ def merg(x, y):
if drop_omitted:
for uo in unordered:
new_order.remove(uo)
- summ = summ.loc[new_order]
+ summ = summ.reindex(new_order, level=0)
idx = []
index = summ.index.get_level_values(0)
@@ -561,10 +573,6 @@ def merg(x, y):
for df, name in zip(cols, _make_unique([df.columns[0] for df in cols])):
df.columns = [name]
- def merg(x, y):
- return x.merge(y, how='outer', right_index=True,
- left_index=True)
-
info = reduce(merg, cols)
dat = pd.DataFrame(np.vstack([summ, info])) # pd.concat better, but error
dat.columns = summ.columns
diff --git a/statsmodels/iolib/tests/test_summary2.py b/statsmodels/iolib/tests/test_summary2.py
index 0d342949a40..f18c45ea09b 100644
--- a/statsmodels/iolib/tests/test_summary2.py
+++ b/statsmodels/iolib/tests/test_summary2.py
@@ -70,13 +70,14 @@ def test_summarycol_float_format(self):
reg2 = OLS(y2, x).fit()
actual = summary_col([reg1, reg2], float_format='%0.1f').as_text()
actual = '%s\n' % actual
- assert_equal(actual, desired)
starred = summary_col([reg1, reg2], stars=True, float_format='%0.1f')
assert "7.7***" in str(starred)
assert "12.4**" in str(starred)
assert "12.4***" not in str(starred)
+ assert_equal(actual, desired)
+
def test_summarycol_drop_omitted(self):
# gh-3702
x = [1, 5, 7, 3, 5]
diff --git a/statsmodels/multivariate/cancorr.py b/statsmodels/multivariate/cancorr.py
index f714c0fe8b4..408d9548ce0 100644
--- a/statsmodels/multivariate/cancorr.py
+++ b/statsmodels/multivariate/cancorr.py
@@ -34,9 +34,9 @@ class CanCorr(Model):
See Parameters.
cancorr : ndarray
The canonical correlation values
- y_cancoeff : ndarray
+ y_cancoef : ndarray
The canonical coefficients for endog
- x_cancoeff : ndarray
+ x_cancoef : ndarray
The canonical coefficients for exog
References
diff --git a/statsmodels/regression/linear_model.py b/statsmodels/regression/linear_model.py
index baab82c287a..34166588e93 100644
--- a/statsmodels/regression/linear_model.py
+++ b/statsmodels/regression/linear_model.py
@@ -796,7 +796,7 @@ def loglike(self, params):
-\frac{n}{2}\left(1+\log\left(\frac{2\pi}{n}\right)\right)
+\frac{1}{2}\log\left(\left|W\right|\right)
- where :math:`W` is a diagonal weight matrix and
+ where :math:`W` is a diagonal weight matrix,
:math:`\left|W\right|` is its determinant, and
:math:`SSR=\left(Y-\hat{Y}\right)^\prime W \left(Y-\hat{Y}\right)` is
the sum of the squared weighted residuals.
diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py
index 3e453f956e0..571dcc8d5a7 100644
--- a/statsmodels/regression/tests/test_lme.py
+++ b/statsmodels/regression/tests/test_lme.py
@@ -1076,7 +1076,11 @@ def test_summary_col():
mod2 = MixedLM.from_formula('X ~ Y', d, groups=d['IDS'])
results2 = mod2.fit(start_params=sp2)
- out = summary_col([results1, results2], stars=True)
+ out = summary_col(
+ [results1, results2],
+ stars=True,
+ regressor_order=["Group Var", "Intercept", "X", "Y"]
+ )
s = ('\n=============================\n Y X \n'
'-----------------------------\nGroup Var 0.1955 1.3854 \n'
' (0.6032) (2.7377) \nIntercept -1.2672 3.4842* \n'
diff --git a/statsmodels/src/math.pxd b/statsmodels/src/math.pxd
index 9ed4555268a..6812e2f0197 100644
--- a/statsmodels/src/math.pxd
+++ b/statsmodels/src/math.pxd
@@ -6,16 +6,16 @@ from libc.string cimport memcpy
cdef extern from "numpy/npy_math.h":
np.float64_t NPY_PI
- np.float64_t npy_cabs(np.npy_cdouble z) nogil
- np.npy_cdouble npy_clog(np.npy_cdouble z) nogil
- np.npy_cdouble npy_cexp(np.npy_cdouble z) nogil
+ np.float64_t npy_cabs(np.npy_cdouble z) noexcept nogil
+ np.npy_cdouble npy_clog(np.npy_cdouble z) noexcept nogil
+ np.npy_cdouble npy_cexp(np.npy_cdouble z) noexcept nogil
-cdef inline np.float64_t zabs(np.complex128_t z) nogil:
+cdef inline np.float64_t zabs(np.complex128_t z) noexcept nogil:
cdef np.npy_cdouble x
memcpy(&x, &z, sizeof(z))
return npy_cabs(x)
-cdef inline np.complex128_t zlog(np.complex128_t z) nogil:
+cdef inline np.complex128_t zlog(np.complex128_t z) noexcept nogil:
cdef np.npy_cdouble x
cdef np.complex128_t out
memcpy(&x, &z, sizeof(z))
@@ -23,7 +23,7 @@ cdef inline np.complex128_t zlog(np.complex128_t z) nogil:
memcpy(&out, &x, sizeof(x))
return out
-cdef inline np.complex128_t zexp(np.complex128_t z) nogil:
+cdef inline np.complex128_t zexp(np.complex128_t z) noexcept nogil:
cdef np.npy_cdouble x
cdef np.complex128_t out
memcpy(&x, &z, sizeof(z))
diff --git a/statsmodels/stats/_adnorm.py b/statsmodels/stats/_adnorm.py
index 05b01620b3f..bed38c4fd02 100644
--- a/statsmodels/stats/_adnorm.py
+++ b/statsmodels/stats/_adnorm.py
@@ -5,6 +5,8 @@
Author: Josef Perktold and Scipy developers
License : BSD-3
"""
+import warnings
+
import numpy as np
from scipy import stats
@@ -64,8 +66,12 @@ def anderson_statistic(x, dist='norm', fit=True, params=(), axis=0):
sl2 = [slice(None)] * x.ndim
sl2[axis] = slice(None, None, -1)
sl2 = tuple(sl2)
- s = np.sum((2 * i[sl1] - 1.0) / nobs * (np.log(z) + np.log1p(-z[sl2])),
- axis=axis)
+ with warnings.catch_warnings():
+ warnings.filterwarnings(
+ "ignore", message="divide by zero encountered in log1p"
+ )
+ ad_values = (2 * i[sl1] - 1.0) / nobs * (np.log(z) + np.log1p(-z[sl2]))
+ s = np.sum(ad_values, axis=axis)
a2 = -nobs - s
return a2
diff --git a/statsmodels/stats/_delta_method.py b/statsmodels/stats/_delta_method.py
index 931080a6128..956e1531617 100644
--- a/statsmodels/stats/_delta_method.py
+++ b/statsmodels/stats/_delta_method.py
@@ -151,6 +151,9 @@ def var(self):
"""
g = self.grad()
var = (np.dot(g, self.cov_params) * g).sum(-1)
+
+ if var.ndim == 2:
+ var = var.T
return var
def se_vectorized(self):
diff --git a/statsmodels/stats/anova.py b/statsmodels/stats/anova.py
index e6bb583fe2e..e32bc7b85e7 100644
--- a/statsmodels/stats/anova.py
+++ b/statsmodels/stats/anova.py
@@ -1,14 +1,18 @@
+from statsmodels.compat.python import lrange
+
import numpy as np
-from scipy import stats
import pandas as pd
from pandas import DataFrame, Index
import patsy
+from scipy import stats
-from statsmodels.regression.linear_model import OLS
-from statsmodels.compat.python import lrange
-from statsmodels.formula.formulatools import (_remove_intercept_patsy,
- _has_intercept, _intercept_idx)
+from statsmodels.formula.formulatools import (
+ _has_intercept,
+ _intercept_idx,
+ _remove_intercept_patsy,
+)
from statsmodels.iolib import summary2
+from statsmodels.regression.linear_model import OLS
def _get_covariance(model, robust):
@@ -489,7 +493,7 @@ def __init__(self, data, depvar, subject, within=None, between=None,
self.subject = subject
if aggregate_func == 'mean':
- self.aggregate_func = np.mean
+ self.aggregate_func = pd.Series.mean
else:
self.aggregate_func = aggregate_func
@@ -640,7 +644,9 @@ def summary(self):
if __name__ == "__main__":
import pandas
+
from statsmodels.formula.api import ols
+
# in R
#library(car)
#write.csv(Moore, "moore.csv", row.names=FALSE)
diff --git a/statsmodels/stats/descriptivestats.py b/statsmodels/stats/descriptivestats.py
index 751c554047b..b4366e1b5c8 100644
--- a/statsmodels/stats/descriptivestats.py
+++ b/statsmodels/stats/descriptivestats.py
@@ -1,10 +1,11 @@
-from statsmodels.compat.pandas import Appender, is_numeric_dtype
+from statsmodels.compat.pandas import PD_LT_2, Appender, is_numeric_dtype
from statsmodels.compat.scipy import SP_LT_19
-from statsmodels.compat.pandas import PD_LT_2
+
from typing import Sequence, Union
import numpy as np
import pandas as pd
+
if PD_LT_2:
from pandas.core.dtypes.common import is_categorical_dtype
else:
@@ -438,7 +439,8 @@ def _mode(ser):
_df = df.copy()
for col in df:
if is_extension_array_dtype(df[col].dtype):
- _df[col] = _df[col].astype(object).fillna(np.nan)
+ if _df[col].isnull().any():
+ _df[col] = _df[col].fillna(np.nan)
except ImportError:
pass
@@ -587,7 +589,8 @@ def summary(self) -> SimpleTable:
A table instance supporting export to text, csv and LaTeX
"""
df = self.frame.astype(object)
- df = df.fillna("")
+ if df.isnull().any().any():
+ df = df.fillna("")
cols = [str(col) for col in df.columns]
stubs = [str(idx) for idx in df.index]
data = []
diff --git a/statsmodels/stats/diagnostic.py b/statsmodels/stats/diagnostic.py
index 504061bff79..07c1c4ecdce 100644
--- a/statsmodels/stats/diagnostic.py
+++ b/statsmodels/stats/diagnostic.py
@@ -32,12 +32,22 @@
from scipy import stats
from statsmodels.regression.linear_model import OLS, RegressionResultsWrapper
+from statsmodels.stats._adnorm import anderson_statistic, normal_ad
+from statsmodels.stats._lilliefors import (
+ kstest_exponential,
+ kstest_fit,
+ kstest_normal,
+ lilliefors,
+)
+from statsmodels.tools.validation import (
+ array_like,
+ bool_like,
+ dict_like,
+ float_like,
+ int_like,
+ string_like,
+)
from statsmodels.tsa.tsatools import lagmat
-from statsmodels.tools.validation import (array_like, int_like, bool_like,
- string_like, dict_like, float_like)
-from statsmodels.stats._lilliefors import (kstest_fit, lilliefors,
- kstest_normal, kstest_exponential)
-from statsmodels.stats._adnorm import normal_ad, anderson_statistic
__all__ = ["kstest_fit", "lilliefors", "kstest_normal", "kstest_exponential",
"normal_ad", "compare_cox", "compare_j", "acorr_breusch_godfrey",
@@ -1062,7 +1072,7 @@ def linear_reset(res, power=3, test_type="fitted", use_f=False,
raise ValueError("power must contains distinct integers all >= 2")
exog = res.model.exog
if test_type == "fitted":
- aug = res.fittedvalues[:, None]
+ aug = np.asarray(res.fittedvalues)[:, None]
elif test_type == "exog":
# Remove constant and binary
aug = res.model.exog
@@ -1283,7 +1293,7 @@ def linear_lm(resid, exog, func=None):
if func is None:
def func(x):
return np.power(x, 2)
-
+ exog = np.asarray(exog)
exog_aux = np.column_stack((exog, func(exog[:, 1:])))
nobs, k_vars = exog.shape
@@ -1609,7 +1619,7 @@ def breaks_cusumolsresid(resid, ddof=0):
Ploberger, Werner, and Walter Kramer. “The Cusum Test with OLS Residuals.”
Econometrica 60, no. 2 (March 1992): 271-285.
"""
- resid = resid.ravel()
+ resid = np.asarray(resid).ravel()
nobs = len(resid)
nobssigma2 = (resid ** 2).sum()
if ddof > 0:
diff --git a/statsmodels/stats/oneway.py b/statsmodels/stats/oneway.py
index 04dbd1d97da..943006d9644 100644
--- a/statsmodels/stats/oneway.py
+++ b/statsmodels/stats/oneway.py
@@ -586,7 +586,7 @@ def anova_oneway(data, groups=None, use_var="unequal", welch_correction=True,
This is the default.
"equal" : Variances are assumed to be equal across samples.
This is the standard Anova.
- "bf: Variances are not assumed to be equal across samples.
+ "bf" : Variances are not assumed to be equal across samples.
The method is Browne-Forsythe (1971) for testing equality of means
with the corrected degrees of freedom by Merothra. The original BF
degrees of freedom are available as additional attributes in the
diff --git a/statsmodels/stats/tests/test_anova_rm.py b/statsmodels/stats/tests/test_anova_rm.py
index 007cb0f3218..b5b97e49717 100644
--- a/statsmodels/stats/tests/test_anova_rm.py
+++ b/statsmodels/stats/tests/test_anova_rm.py
@@ -1,10 +1,13 @@
from statsmodels.compat.pandas import assert_frame_equal
+from numpy.testing import (
+ assert_array_almost_equal,
+ assert_equal,
+ assert_raises,
+)
import pandas as pd
-import numpy as np
+
from statsmodels.stats.anova import AnovaRM
-from numpy.testing import (assert_array_almost_equal, assert_raises,
- assert_equal)
DV = [7, 3, 6, 6, 5, 8, 6, 7,
7, 11, 9, 11, 10, 10, 11, 11,
@@ -141,7 +144,7 @@ def test_repeated_measures_aggregation():
df1 = AnovaRM(data, 'DV', 'id', within=['A', 'B', 'D']).fit()
double_data = pd.concat([data, data], axis=0)
df2 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.mean).fit()
+ aggregate_func=pd.Series.mean).fit()
assert_frame_equal(df1.anova_table, df2.anova_table)
@@ -152,7 +155,7 @@ def test_repeated_measures_aggregation_one_subject_duplicated():
data2 = data2.reset_index()
df2 = AnovaRM(data2,
'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.mean).fit()
+ aggregate_func=pd.Series.mean).fit()
assert_frame_equal(df1.anova_table, df2.anova_table)
@@ -163,9 +166,9 @@ def test_repeated_measures_aggregate_func():
within=['A', 'B', 'D'])
m1 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.mean)
+ aggregate_func=pd.Series.mean)
m2 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.median)
+ aggregate_func=pd.Series.median)
assert_raises(AssertionError, assert_equal,
m1.aggregate_func, m2.aggregate_func)
@@ -175,7 +178,7 @@ def test_repeated_measures_aggregate_func():
def test_repeated_measures_aggregate_func_mean():
double_data = pd.concat([data, data], axis=0)
m1 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.mean)
+ aggregate_func=pd.Series.mean)
m2 = AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
aggregate_func='mean')
@@ -197,7 +200,7 @@ def test_repeated_measures_aggregate_compare_with_ezANOVA():
double_data = pd.concat([data, data], axis=0)
df = (AnovaRM(double_data, 'DV', 'id', within=['A', 'B', 'D'],
- aggregate_func=np.mean)
+ aggregate_func=pd.Series.mean)
.fit()
.anova_table)
diff --git a/statsmodels/stats/tests/test_diagnostic.py b/statsmodels/stats/tests/test_diagnostic.py
index b1f803ed7eb..b05698e6414 100644
--- a/statsmodels/stats/tests/test_diagnostic.py
+++ b/statsmodels/stats/tests/test_diagnostic.py
@@ -1957,3 +1957,43 @@ def test_small_skip(reset_randomstate):
# M2 + fit(M1)-exp(fit(M2)) < 2.22e-16 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+
+@pytest.mark.smoke
+def test_diagnostics_pandas(reset_randomstate):
+ # GH 8879
+ n = 100
+ df = pd.DataFrame(
+ {
+ "y": np.random.rand(n),
+ "x": np.random.rand(n),
+ "z": np.random.rand(n)}
+ )
+ y, x = df["y"], add_constant(df["x"])
+
+ res = OLS(df["y"], add_constant(df[["x"]])).fit()
+ res_large = OLS(df["y"], add_constant(df[["x", "z"]])).fit()
+ res_other = OLS(df["y"], add_constant(df[["z"]])).fit()
+ smsdia.linear_reset(res_large)
+ smsdia.linear_reset(res_large, test_type="fitted")
+ smsdia.linear_reset(res_large, test_type="exog")
+ smsdia.linear_reset(res_large, test_type="princomp")
+ smsdia.het_goldfeldquandt(y, x)
+ smsdia.het_breuschpagan(res.resid, x)
+ smsdia.het_white(res.resid, x)
+ smsdia.het_arch(res.resid)
+ smsdia.acorr_breusch_godfrey(res)
+ smsdia.acorr_ljungbox(y)
+ smsdia.linear_rainbow(res)
+ smsdia.linear_lm(res.resid, x)
+ smsdia.linear_harvey_collier(res)
+ smsdia.acorr_lm(res.resid)
+ smsdia.breaks_cusumolsresid(res.resid)
+ smsdia.breaks_hansen(res)
+ smsdia.compare_cox(res, res_other)
+ smsdia.compare_encompassing(res, res_other)
+ smsdia.compare_j(res, res_other)
+ smsdia.recursive_olsresiduals(res)
+ smsdia.recursive_olsresiduals(
+ res, order_by=np.arange(y.shape[0] - 1, 0 - 1, -1)
+ )
+ smsdia.spec_white(res.resid, x)
diff --git a/statsmodels/stats/tests/test_weightstats.py b/statsmodels/stats/tests/test_weightstats.py
index 3ac7f9a39f6..60a4a37bd12 100644
--- a/statsmodels/stats/tests/test_weightstats.py
+++ b/statsmodels/stats/tests/test_weightstats.py
@@ -677,6 +677,45 @@ def test_ztest_ztost():
ztest_larger_mu_1s.method = 'One-sample z-Test'
ztest_larger_mu_1s.data_name = 'x'
+# > zt = z.test(x, sigma.x=0.46436662631627995, y, sigma.y=0.7069805008424409)
+# > cat_items(zt, "ztest_unequal.")
+ztest_unequal = Holder()
+ztest_unequal.statistic = 6.12808151466544
+ztest_unequal.p_value = 8.89450168270109e-10
+ztest_unequal.conf_int = np.array([1.19415646579981, 2.31720717056382])
+ztest_unequal.estimate = np.array([7.01818181818182, 5.2625])
+ztest_unequal.null_value = 0
+ztest_unequal.alternative = 'two.sided'
+ztest_unequal.usevar = 'unequal'
+ztest_unequal.method = 'Two-sample z-Test'
+ztest_unequal.data_name = 'x and y'
+
+# > zt = z.test(x, sigma.x=0.46436662631627995, y, sigma.y=0.7069805008424409, alternative="less")
+# > cat_items(zt, "ztest_smaller_unequal.")
+ztest_smaller_unequal = Holder()
+ztest_smaller_unequal.statistic = 6.12808151466544
+ztest_smaller_unequal.p_value = 0.999999999555275
+ztest_smaller_unequal.conf_int = np.array([np.nan, 2.22692874913371])
+ztest_smaller_unequal.estimate = np.array([7.01818181818182, 5.2625])
+ztest_smaller_unequal.null_value = 0
+ztest_smaller_unequal.alternative = 'less'
+ztest_smaller_unequal.usevar = 'unequal'
+ztest_smaller_unequal.method = 'Two-sample z-Test'
+ztest_smaller_unequal.data_name = 'x and y'
+
+# > zt = z.test(x, sigma.x=0.46436662631627995, y, sigma.y=0.7069805008424409, alternative="greater")
+# > cat_items(zt, "ztest_larger_unequal.")
+ztest_larger_unequal = Holder()
+ztest_larger_unequal.statistic = 6.12808151466544
+ztest_larger_unequal.p_value = 4.44725034576265e-10
+ztest_larger_unequal.conf_int = np.array([1.28443488722992, np.nan])
+ztest_larger_unequal.estimate = np.array([7.01818181818182, 5.2625])
+ztest_larger_unequal.null_value = 0
+ztest_larger_unequal.alternative = 'greater'
+ztest_larger_unequal.usevar = 'unequal'
+ztest_larger_unequal.method = 'Two-sample z-Test'
+ztest_larger_unequal.data_name = 'x and y'
+
alternatives = {'less': 'smaller',
'greater': 'larger',
@@ -733,6 +772,14 @@ def test(self):
alternative=alternatives[tc.alternative])
assert_allclose(ci, tc_conf_int - tc.null_value, rtol=1e-10)
+ # unequal variances
+ for tc in [ztest_unequal, ztest_smaller_unequal, ztest_larger_unequal]:
+ zstat, pval = ztest(x1, x2, value=tc.null_value,
+ alternative=alternatives[tc.alternative],
+ usevar="unequal")
+ assert_allclose(zstat, tc.statistic, rtol=1e-10)
+ assert_allclose(pval, tc.p_value, rtol=1e-10, atol=1e-16)
+
# 1 sample test copy-paste
d1 = self.d1
for tc in [ztest_mu_1s, ztest_smaller_mu_1s, ztest_larger_mu_1s]:
diff --git a/statsmodels/stats/weightstats.py b/statsmodels/stats/weightstats.py
index 9af1f5eed7e..57f136c9e91 100644
--- a/statsmodels/stats/weightstats.py
+++ b/statsmodels/stats/weightstats.py
@@ -1510,10 +1510,10 @@ def ztest(
'larger' : H1: difference in means larger than value
'smaller' : H1: difference in means smaller than value
- usevar : str, 'pooled'
- Currently, only 'pooled' is implemented.
+ usevar : str, 'pooled' or 'unequal'
If ``pooled``, then the standard deviation of the samples is assumed to be
- the same. see CompareMeans.ztest_ind for different options.
+ the same. If ``unequal``, then the standard deviation of the sample is
+ assumed to be different.
ddof : int
Degrees of freedom use in the calculation of the variance of the mean
estimate. In the case of comparing means this is one, however it can
@@ -1528,35 +1528,38 @@ def ztest(
Notes
-----
- usevar not implemented, is always pooled in two sample case
- use CompareMeans instead.
+ usevar can be pooled or unequal in two sample case
"""
# TODO: this should delegate to CompareMeans like ttest_ind
# However that does not implement ddof
- # usevar is not used, always pooled
+ # usevar can be pooled or unequal
- if usevar != "pooled":
- raise NotImplementedError('only usevar="pooled" is implemented')
+ if usevar not in {"pooled", "unequal"}:
+ raise NotImplementedError('usevar can only be "pooled" or "unequal"')
x1 = np.asarray(x1)
nobs1 = x1.shape[0]
x1_mean = x1.mean(0)
x1_var = x1.var(0)
+
if x2 is not None:
x2 = np.asarray(x2)
nobs2 = x2.shape[0]
x2_mean = x2.mean(0)
x2_var = x2.var(0)
- var_pooled = nobs1 * x1_var + nobs2 * x2_var
- var_pooled /= nobs1 + nobs2 - 2 * ddof
- var_pooled *= 1.0 / nobs1 + 1.0 / nobs2
+ if usevar == "pooled":
+ var = nobs1 * x1_var + nobs2 * x2_var
+ var /= nobs1 + nobs2 - 2 * ddof
+ var *= 1.0 / nobs1 + 1.0 / nobs2
+ elif usevar == "unequal":
+ var = x1_var / (nobs1 - ddof) + x2_var / (nobs2 - ddof)
else:
- var_pooled = x1_var / (nobs1 - ddof)
+ var = x1_var / (nobs1 - ddof)
x2_mean = 0
- std_diff = np.sqrt(var_pooled)
+ std_diff = np.sqrt(var)
# stat = x1_mean - x2_mean - value
return _zstat_generic(x1_mean, x2_mean, std_diff, alternative, diff=value)
diff --git a/statsmodels/tools/_testing.py b/statsmodels/tools/_testing.py
index 54f5c2861d4..3c1408b413d 100644
--- a/statsmodels/tools/_testing.py
+++ b/statsmodels/tools/_testing.py
@@ -40,6 +40,7 @@ def __call__(self, extra_args=None, exit=False):
print('Running pytest ' + ' '.join(cmd))
status = pytest.main(cmd)
if exit:
+ print(f"Exit status: {status}")
sys.exit(status)
except ImportError:
raise ImportError('pytest>=3 required to run the test')
diff --git a/statsmodels/tools/numdiff.py b/statsmodels/tools/numdiff.py
index 10296aa6108..c77df81b4e2 100644
--- a/statsmodels/tools/numdiff.py
+++ b/statsmodels/tools/numdiff.py
@@ -93,7 +93,7 @@
def _get_epsilon(x, s, epsilon, n):
if epsilon is None:
- h = EPS**(1. / s) * np.maximum(np.abs(x), 0.1)
+ h = EPS**(1. / s) * np.maximum(np.abs(np.asarray(x)), 0.1)
else:
if np.isscalar(epsilon):
h = np.empty(n)
diff --git a/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py b/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py
index 256dcd428a3..6cd0fc75cd5 100644
--- a/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py
+++ b/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import FUTURE_STACK
+
from collections import defaultdict
import glob
import os
@@ -45,7 +47,7 @@
crit_vals[key] = [round(val, 7) for val in list(row)]
def setup_regressors(df, low_pow=3, high_pow=3, cut=70, log=False):
- s = df.stack().reset_index()
+ s = df.stack(**FUTURE_STACK).reset_index()
q = s.level_0 / 10000
y = stats.norm.ppf(q)
cv = s[0]
diff --git a/statsmodels/tsa/base/tests/test_base.py b/statsmodels/tsa/base/tests/test_base.py
index ccdb1daef93..0dc9dc7c345 100644
--- a/statsmodels/tsa/base/tests/test_base.py
+++ b/statsmodels/tsa/base/tests/test_base.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import PD_LT_2_2_0
+
from datetime import datetime
import numpy as np
@@ -9,6 +11,7 @@
from statsmodels.tools.testing import assert_equal
from statsmodels.tsa.base.tsa_model import TimeSeriesModel
+YE_APR = "A-APR" if PD_LT_2_2_0 else "YE-APR"
def test_pandas_nodates_index():
@@ -43,12 +46,13 @@ def test_predict_freq():
# there's a bug in pandas up to 0.10.2 for YearBegin
#dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
- dates = pd.date_range("1972-4-30", "2006-4-30", freq="A-APR")
+
+ dates = pd.date_range("1972-4-30", "2006-4-30", freq=YE_APR)
series = pd.Series(x, index=dates)
model = TimeSeriesModel(series)
#npt.assert_(model.data.freq == "AS-APR")
# two possabilities due to future changes in pandas 2.2+
- assert model._index.freqstr in ("Y-APR", "A-APR")
+ assert model._index.freqstr in ("Y-APR", "A-APR", "YE-APR")
start, end, out_of_sample, _ = (
model._get_prediction_index("2006-4-30", "2016-4-30"))
@@ -57,7 +61,7 @@ def test_predict_freq():
#expected_dates = date_range("2006-12-31", "2016-12-31",
# freq="AS-APR")
- expected_dates = pd.date_range("2006-4-30", "2016-4-30", freq="A-APR")
+ expected_dates = pd.date_range("2006-4-30", "2016-4-30", freq=YE_APR)
assert_equal(predict_dates, expected_dates)
#ptesting.assert_series_equal(predict_dates, expected_dates)
@@ -66,7 +70,7 @@ def test_keyerror_start_date():
x = np.arange(1,36.)
# dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR")
- dates = pd.date_range("1972-4-30", "2006-4-30", freq="A-APR")
+ dates = pd.date_range("1972-4-30", "2006-4-30", freq=YE_APR)
series = pd.Series(x, index=dates)
model = TimeSeriesModel(series)
diff --git a/statsmodels/tsa/base/tests/test_prediction.py b/statsmodels/tsa/base/tests/test_prediction.py
index 1fc46a5b44c..4721be67be4 100644
--- a/statsmodels/tsa/base/tests/test_prediction.py
+++ b/statsmodels/tsa/base/tests/test_prediction.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import MONTH_END
+
import numpy as np
import pandas as pd
import pytest
@@ -12,7 +14,7 @@ def data(request):
variance = np.arange(1, 11.0)
if not request.param:
return mean, variance
- idx = pd.date_range("2000-1-1", periods=10, freq="M")
+ idx = pd.date_range("2000-1-1", periods=10, freq=MONTH_END)
return pd.Series(mean, index=idx), pd.Series(variance, index=idx)
diff --git a/statsmodels/tsa/base/tests/test_tsa_indexes.py b/statsmodels/tsa/base/tests/test_tsa_indexes.py
index 0d607fc3931..ae6939ddd2f 100644
--- a/statsmodels/tsa/base/tests/test_tsa_indexes.py
+++ b/statsmodels/tsa/base/tests/test_tsa_indexes.py
@@ -9,7 +9,7 @@
Author: Chad Fulton
License: BSD-3
"""
-from statsmodels.compat.pandas import is_int_index
+from statsmodels.compat.pandas import PD_LT_2_2_0, YEAR_END, is_int_index
import warnings
@@ -30,15 +30,16 @@
pd.DataFrame(base_dta),
]
+TWO_QE_DEC = "2Q-DEC" if PD_LT_2_2_0 else "2QE-DEC"
base_date_indexes = [
# (usual candidates)
pd.date_range(start="1950-01-01", periods=nobs, freq="D"),
pd.date_range(start="1950-01-01", periods=nobs, freq="W"),
pd.date_range(start="1950-01-01", periods=nobs, freq="MS"),
pd.date_range(start="1950-01-01", periods=nobs, freq="QS"),
- pd.date_range(start="1950-01-01", periods=nobs, freq="Y"),
+ pd.date_range(start="1950-01-01", periods=nobs, freq=YEAR_END),
# (some more complicated frequencies)
- pd.date_range(start="1950-01-01", periods=nobs, freq="2Q-DEC"),
+ pd.date_range(start="1950-01-01", periods=nobs, freq=TWO_QE_DEC),
pd.date_range(start="1950-01-01", periods=nobs, freq="2QS"),
pd.date_range(start="1950-01-01", periods=nobs, freq="5s"),
pd.date_range(start="1950-01-01", periods=nobs, freq="1D10min"),
@@ -221,7 +222,7 @@ def test_instantiation_valid():
if freq is None:
freq = ix.freq
if not isinstance(freq, str):
- freq = freq.freqstr
+ freq = ix.freqstr
assert_equal(
isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
True,
@@ -242,7 +243,7 @@ def test_instantiation_valid():
if freq is None:
freq = ix.freq
if not isinstance(freq, str):
- freq = freq.freqstr
+ freq = ix.freqstr
assert_equal(
isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
True,
@@ -296,7 +297,7 @@ def test_instantiation_valid():
if freq is None:
freq = ix.freq
if not isinstance(freq, str):
- freq = freq.freqstr
+ freq = ix.freqstr
assert_equal(
isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)),
True,
diff --git a/statsmodels/tsa/deterministic.py b/statsmodels/tsa/deterministic.py
index 8d45c8189e7..6944730310e 100644
--- a/statsmodels/tsa/deterministic.py
+++ b/statsmodels/tsa/deterministic.py
@@ -1,4 +1,9 @@
-from statsmodels.compat.pandas import Appender, is_int_index, to_numpy
+from statsmodels.compat.pandas import (
+ PD_LT_2_2_0,
+ Appender,
+ is_int_index,
+ to_numpy,
+)
from abc import ABC, abstractmethod
import datetime as dt
@@ -742,19 +747,31 @@ class CalendarSeasonality(CalendarDeterministicTerm):
_is_dummy = True
# out_of: freq
- _supported = {
- "W": {"H": 24 * 7, "B": 5, "D": 7, "h": 24 * 7},
- "D": {"H": 24, "h": 24},
- "Q": {"M": 3, "MS": 3},
- "A": {"M": 12, "MS": 12, "Q": 4}
- }
+ if PD_LT_2_2_0:
+ _supported = {
+ "W": {"B": 5, "D": 7, "h": 24 * 7, "H": 24 * 7},
+ "D": {"h": 24, "H": 24},
+ "Q": {"MS": 3, "M": 3},
+ "A": {"MS": 12, "M": 12},
+ "Y": {"MS": 12, "Q": 4, "M": 12},
+ }
+ else:
+ _supported = {
+ "W": {"B": 5, "D": 7, "h": 24 * 7},
+ "D": {"h": 24},
+ "Q": {"MS": 3, "ME": 3},
+ "A": {"MS": 12, "ME": 12, "QE": 4},
+ "Y": {"MS": 12, "ME": 12, "QE": 4},
+ "QE": {"ME": 3},
+ "YE": {"ME": 12, "QE": 4},
+ }
def __init__(self, freq: str, period: str) -> None:
freq_options: Set[str] = set()
freq_options.update(
*[list(val.keys()) for val in self._supported.values()]
)
- period_options = list(self._supported.keys())
+ period_options = tuple(self._supported.keys())
freq = string_like(
freq, "freq", options=tuple(freq_options), lower=False
@@ -784,7 +801,7 @@ def period(self) -> str:
def _weekly_to_loc(
self, index: Union[pd.DatetimeIndex, pd.PeriodIndex]
) -> np.ndarray:
- if self._freq.freqstr == "H":
+ if self._freq.freqstr in ("h", "H"):
return index.hour + 24 * index.dayofweek
elif self._freq.freqstr == "D":
return index.dayofweek
@@ -811,7 +828,7 @@ def _quarterly_to_loc(
def _annual_to_loc(
self, index: Union[pd.DatetimeIndex, pd.PeriodIndex]
) -> np.ndarray:
- if self._freq.freqstr == "M":
+ if self._freq.freqstr in ("M", "ME", "MS"):
return index.month - 1
else: # "Q"
return index.quarter - 1
@@ -823,9 +840,9 @@ def _get_terms(
locs = self._daily_to_loc(index)
elif self._period == "W":
locs = self._weekly_to_loc(index)
- elif self._period == "Q":
+ elif self._period in ("Q", "QE"):
locs = self._quarterly_to_loc(index)
- else: # "A":
+ else: # "A", "Y":
locs = self._annual_to_loc(index)
full_cycle = self._supported[self._period][self._freq_str]
terms = np.zeros((locs.shape[0], full_cycle))
diff --git a/statsmodels/tsa/filters/tests/test_filters.py b/statsmodels/tsa/filters/tests/test_filters.py
index 836efc5afd2..30fafc2b785 100644
--- a/statsmodels/tsa/filters/tests/test_filters.py
+++ b/statsmodels/tsa/filters/tests/test_filters.py
@@ -1,4 +1,8 @@
-from statsmodels.compat.pandas import assert_frame_equal, make_dataframe
+from statsmodels.compat.pandas import (
+ QUARTER_END,
+ assert_frame_equal,
+ make_dataframe,
+)
from datetime import datetime
@@ -566,7 +570,7 @@ def test_cfitz_filter():
def test_bking_pandas():
# 1d
dta = macrodata.load_pandas().data
- index = date_range(start='1959-01-01', end='2009-10-01', freq='Q')
+ index = date_range(start='1959-01-01', end='2009-10-01', freq=QUARTER_END)
dta.index = index
filtered = bkfilter(dta["infl"])
nd_filtered = bkfilter(dta['infl'].values)
@@ -587,7 +591,7 @@ def test_bking_pandas():
def test_cfitz_pandas():
# 1d
dta = macrodata.load_pandas().data
- index = date_range(start='1959-01-01', end='2009-10-01', freq='Q')
+ index = date_range(start='1959-01-01', end='2009-10-01', freq=QUARTER_END)
dta.index = index
cycle, trend = cffilter(dta["infl"])
ndcycle, ndtrend = cffilter(dta['infl'].values)
@@ -607,7 +611,7 @@ def test_cfitz_pandas():
def test_hpfilter_pandas():
dta = macrodata.load_pandas().data
- index = date_range(start='1959-01-01', end='2009-10-01', freq='Q')
+ index = date_range(start='1959-01-01', end='2009-10-01', freq=QUARTER_END)
dta.index = index
cycle, trend = hpfilter(dta["realgdp"])
ndcycle, ndtrend = hpfilter(dta['realgdp'].values)
@@ -626,11 +630,11 @@ def setup_class(cls):
232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184]
cls.data = DataFrame(data, date_range(start='1/1/1951',
periods=len(data),
- freq='Q'))
+ freq=QUARTER_END))
data[9] = np.nan
cls.datana = DataFrame(data, date_range(start='1/1/1951',
periods=len(data),
- freq='Q'))
+ freq=QUARTER_END))
from .results import filter_results
cls.expected = filter_results
diff --git a/statsmodels/tsa/forecasting/tests/test_stl.py b/statsmodels/tsa/forecasting/tests/test_stl.py
index f673886b5e7..9ad1b0995c7 100644
--- a/statsmodels/tsa/forecasting/tests/test_stl.py
+++ b/statsmodels/tsa/forecasting/tests/test_stl.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import MONTH_END
+
import numpy as np
from numpy.testing import assert_allclose
import pandas as pd
@@ -20,7 +22,7 @@
def data(request):
rs = np.random.RandomState(987654321)
err = rs.standard_normal(500)
- index = pd.date_range("1980-1-1", freq="M", periods=500)
+ index = pd.date_range("1980-1-1", freq=MONTH_END, periods=500)
fourier = Fourier(12, 1)
terms = fourier.in_sample(index)
det = np.squeeze(np.asarray(terms @ np.array([[2], [1]])))
diff --git a/statsmodels/tsa/holtwinters/tests/test_holtwinters.py b/statsmodels/tsa/holtwinters/tests/test_holtwinters.py
index f9a1e8cbba8..6521eac13f9 100644
--- a/statsmodels/tsa/holtwinters/tests/test_holtwinters.py
+++ b/statsmodels/tsa/holtwinters/tests/test_holtwinters.py
@@ -2,6 +2,7 @@
Author: Terence L van Zyl
Modified: Kevin Sheppard
"""
+from statsmodels.compat.pandas import MONTH_END
from statsmodels.compat.pytest import pytest_warns
import os
@@ -108,7 +109,7 @@ def ses():
for i in range(1, 1200):
y[i] = y[i - 1] + e[i] - 0.2 * e[i - 1]
y = y[200:]
- index = pd.date_range("2000-1-1", periods=y.shape[0], freq="M")
+ index = pd.date_range("2000-1-1", periods=y.shape[0], freq=MONTH_END)
return pd.Series(y, index=index, name="y")
@@ -1995,7 +1996,7 @@ def test_forecast_index_types(ses, index_typ):
if index_typ == "period":
index = pd.period_range("2000-1-1", periods=nobs + 36, freq="M")
elif index_typ == "date_range":
- index = pd.date_range("2000-1-1", periods=nobs + 36, freq="M")
+ index = pd.date_range("2000-1-1", periods=nobs + 36, freq=MONTH_END)
elif index_typ == "range":
index = pd.RangeIndex(nobs + 36)
kwargs["seasonal_periods"] = 12
diff --git a/statsmodels/tsa/regime_switching/_hamilton_filter.pyx.in b/statsmodels/tsa/regime_switching/_hamilton_filter.pyx.in
index 2528eca4ce1..6cba3be6880 100644
--- a/statsmodels/tsa/regime_switching/_hamilton_filter.pyx.in
+++ b/statsmodels/tsa/regime_switching/_hamilton_filter.pyx.in
@@ -114,7 +114,7 @@ cdef void {{prefix}}hamilton_filter_log_iteration(int t, int k_regimes, int orde
{{cython_type}} [:] curr_predicted_joint_probabilities,
{{cython_type}} [:] prev_filtered_joint_probabilities,
{{cython_type}} [:] curr_filtered_joint_probabilities,
- {{cython_type}} [:] tmp_predicted_joint_probabilities) nogil:
+ {{cython_type}} [:] tmp_predicted_joint_probabilities) noexcept nogil:
cdef int i, j, k, ix
cdef:
int k_regimes_order_m1 = k_regimes**(order - 1)
diff --git a/statsmodels/tsa/regime_switching/_kim_smoother.pyx.in b/statsmodels/tsa/regime_switching/_kim_smoother.pyx.in
index d207a627da8..d42dc3377b8 100644
--- a/statsmodels/tsa/regime_switching/_kim_smoother.pyx.in
+++ b/statsmodels/tsa/regime_switching/_kim_smoother.pyx.in
@@ -85,7 +85,7 @@ cdef void {{prefix}}kim_smoother_log_iteration(int tt, int k_regimes, int order,
{{cython_type}} [:] predicted_joint_probabilities,
{{cython_type}} [:] filtered_joint_probabilities,
{{cython_type}} [:] prev_smoothed_joint_probabilities,
- {{cython_type}} [:] next_smoothed_joint_probabilities) nogil:
+ {{cython_type}} [:] next_smoothed_joint_probabilities) noexcept nogil:
cdef int t, i, j, k, ix
cdef:
int k_regimes_order_m1 = k_regimes**(order - 1)
diff --git a/statsmodels/tsa/statespace/_tools.pyx.in b/statsmodels/tsa/statespace/_tools.pyx.in
index 1dd17d44942..8947329168f 100644
--- a/statsmodels/tsa/statespace/_tools.pyx.in
+++ b/statsmodels/tsa/statespace/_tools.pyx.in
@@ -1189,7 +1189,7 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
{{cython_type}} scale,
int compute_prior_weights=False):
cdef:
- int t, i, j, n_t, n_j, min_j, max_j, p_t, p_j
+ int t, i, j, n_t, n_j, min_j, max_j, p_t, p_j, t0
int n = model.nobs
int p = model.k_endog
int m = model.k_states
@@ -1231,26 +1231,31 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
t = compute_t[i]
store_t[t] = True
+ # Determine the first index that we need to compute
+ t0 = min(min_j, compute_t[0])
+ # Let double-letters denote indexing that starts from t0
+ nn = n - t0
+
# Flags
filter_collapsed = kfilter.filter_method & 0x20
# Create required storage and temporary matrices
- # Hi_W = np.zeros((p, m, n), order='F')
- # K = np.zeros((p, m, n), order='F')
- # weights = np.zeros((m, p, n, n), order='F') * np.nan
- # state_intercept_weights = np.zeros((m, m, n, n), order='F') * np.nan
+ # Hi_W = np.zeros((p, m, nn), order='F')
+ # K = np.zeros((p, m, nn), order='F')
+ # weights = np.zeros((m, p, nn, nn), order='F') * np.nan
+ # state_intercept_weights = np.zeros((m, m, nn, nn), order='F') * np.nan
# tmp = np.zeros((m, m), order='F')
# F = np.zeros((p, p), order='F')
# Fi = np.zeros((p, p), order='F')
# FiZ = np.zeros((p, m), order='F')
- dim4[0] = m; dim4[1] = p; dim4[2] = n; dim4[3] = n;
+ dim4[0] = m; dim4[1] = p; dim4[2] = nn; dim4[3] = nn;
weights = np.PyArray_ZEROS(4, dim4, {{typenum}}, FORTRAN) * np.nan
- dim4[0] = m; dim4[1] = m; dim4[2] = n; dim4[3] = n;
+ dim4[0] = m; dim4[1] = m; dim4[2] = nn; dim4[3] = nn;
state_intercept_weights = np.PyArray_ZEROS(4, dim4, {{typenum}}, FORTRAN) * np.nan
- dim3[0] = p; dim3[1] = m; dim3[2] = n;
+ dim3[0] = p; dim3[1] = m; dim3[2] = nn;
Hi_W = np.PyArray_ZEROS(3, dim3, {{typenum}}, FORTRAN)
K = np.PyArray_ZEROS(3, dim3, {{typenum}}, FORTRAN)
- dim3[0] = m; dim3[1] = m; dim3[2] = n;
+ dim3[0] = m; dim3[1] = m; dim3[2] = nn;
# Note: the order here is (m, l, t), where the `l` dimension refers to the
# element of the initial state vector, and the `m, t` dimensions refer to
# the element / time point of the smoothed state vector.
@@ -1264,7 +1269,12 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
FiZ = np.PyArray_ZEROS(2, dim2, {{typenum}}, FORTRAN)
# First pass to compute required quantities, if necessary
- for t in range(n):
+ for t in range(t0, n):
+ # Recall that double-letters denotes indexes that start from t0
+ # Hi_W[:, :, nn], K[:, :, nn], weights[:, :, nn, nn],
+ # state_intercept_weights[:, :, nn, nn], prior_weights[:, :, nn]
+ tt = t - t0
+
p_t = p - model.nmissing[t]
# Handling of exact diffuse periods is not currently available
@@ -1273,9 +1283,9 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
if (t < kfilter.nobs_diffuse
or (not store_t[t] and (t < min_j or t > max_j))):
# No need to set this when using the dictionary
- # K[t] = np.zeros((m, p_t)) * np.nan
- # Hi_W[t] is already initialized to NaNs, so can skip:
- # Hi_W[t, :] = np.nan
+ # K[tt] = np.zeros((m, p_t)) * np.nan
+ # Hi_W[tt] is already initialized to NaNs, so can skip:
+ # Hi_W[tt, :] = np.nan
continue
# Move the statespace representation to the time period, but do not
@@ -1286,8 +1296,8 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
if p_t == 0:
# No need to set this when using the dictionary
# K.append(np.zeros((m, 0)))
- # Hi_W[t] is already initialized to NaNs, so can skip:
- # Hi_W[t, :] = np.nan
+ # Hi_W[tt] is already initialized to NaNs, so can skip:
+ # Hi_W[tt, :] = np.nan
continue
# Get filtered objects
@@ -1329,10 +1339,10 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
blas.{{prefix}}gemm("N", "N", &model._k_states, &model._k_endog, &model._k_states,
&alpha, model._transition, &kfilter.k_states,
&tmp[0, 0], &kfilter.k_states,
- &beta, &K[0, 0, t], &kfilter.k_states)
+ &beta, &K[0, 0, tt], &kfilter.k_states)
_FiZ = &FiZ[0, 0]
- _K = &K[0, 0, t]
+ _K = &K[0, 0, tt]
else:
_FiZ = &kfilter.tmp3[0, 0, t]
_K = &kfilter.kalman_gain[0, 0, t]
@@ -1340,7 +1350,7 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
# Compute required quantites
# W_j = H_j (F_j^{-1} Z_j - K_j' N_j L_j)
# => H_j^{-1} W_j = F_j^{-1} Z_j - K_j' N_j L_j
- blas.{{prefix}}copy(&pm, _FiZ, &inc, &Hi_W[0, 0, t], &inc)
+ blas.{{prefix}}copy(&pm, _FiZ, &inc, &Hi_W[0, 0, tt], &inc)
scalar = 1. / scale
blas.{{prefix}}gemm("N", "N", &model._k_states, &model._k_states, &model._k_states,
@@ -1351,11 +1361,16 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
blas.{{prefix}}gemm("T", "N", &model._k_endog, &model._k_states, &model._k_states,
&gamma, _K, &kfilter.k_states,
kfilter._tmp0, &kfilter.k_states,
- &scalar, &Hi_W[0, 0, t], &model.k_endog)
+ &scalar, &Hi_W[0, 0, tt], &model.k_endog)
# Second pass to compute weights
# for t in compute_t:
for t in range(n):
+ # Recall that double-letters denotes indexes that start from t0
+ # Hi_W[:, :, nn], K[:, :, nn], weights[:, :, nn, nn],
+ # state_intercept_weights[:, :, nn, nn], prior_weights[:, :, nn]
+ tt = t - t0
+
if t < kfilter.nobs_diffuse or not store_t[t]:
continue
@@ -1374,9 +1389,13 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
# This is the weight of the prior on the smoothed state at
# time t == 0
if t == 0 and compute_prior_weights:
- blas.{{prefix}}copy(&kfilter.k_states2, kfilter._tmp0, &inc, &prior_weights[0, 0, t], &inc)
+ blas.{{prefix}}copy(&kfilter.k_states2, kfilter._tmp0, &inc, &prior_weights[0, 0, tt], &inc)
for j in range(t - 1, min_j - 1, -1):
+ # Again, recall that double-letters denotes indexes that start
+ # from t0
+ jj = j - t0
+
# Since we are not handling any diffuse observations now, we can
# stop as soon as we get to the diffuse periods
if j < kfilter.nobs_diffuse:
@@ -1386,7 +1405,7 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
compute_K = kfilter.univariate_filter[t] or filter_collapsed
if compute_K:
- _K = &K[0, 0, j]
+ _K = &K[0, 0, jj]
else:
_K = &kfilter.kalman_gain[0, 0, j]
@@ -1399,15 +1418,15 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
# L[j] = \prod_i L_{j, i}
if store_j[j]:
if p_j > 0:
- # weights[:, :p_j, t, j] = tmp @ K[j][:, :p_j]
+ # weights[:, :p_j, tt, jj] = tmp @ K[jj][:, :p_j]
blas.{{prefix}}gemm("N", "N", &model._k_states, &p_j, &model._k_states,
&alpha, kfilter._tmp0, &kfilter.k_states,
_K, &kfilter.k_states,
- &beta, &weights[0, 0, t, j], &kfilter.k_states)
+ &beta, &weights[0, 0, tt, jj], &kfilter.k_states)
- # state_intercept_weights[:, :, t, j] = -tmp
+ # state_intercept_weights[:, :, tt, jj] = -tmp
blas.{{prefix}}copy(&kfilter.k_states2, kfilter._tmp0, &inc,
- &state_intercept_weights[0, 0, t, j], &inc)
+ &state_intercept_weights[0, 0, tt, jj], &inc)
if j > min_j or (j == 0 and compute_prior_weights):
# tmp = tmp @ L
@@ -1421,7 +1440,7 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
# This is the weight of the prior on the smoothed state at
# time t > 0
if j == 0 and compute_prior_weights:
- blas.{{prefix}}copy(&kfilter.k_states2, kfilter._tmp0, &inc, &prior_weights[0, 0, t], &inc)
+ blas.{{prefix}}copy(&kfilter.k_states2, kfilter._tmp0, &inc, &prior_weights[0, 0, tt], &inc)
# For j == t
# For the univariate case, some notes:
@@ -1434,16 +1453,17 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
# incorporated all the information from time t. Then j = t but i < p_t
# would follow the j < t formula.
j = t
+ jj = tt
p_j = p - model.nmissing[t]
if store_j[j]:
if p_j > 0:
- # weights[:, :p_j, t, j] = Pt @ Hi_W[j, :p_j].T
+ # weights[:, :p_j, tt, jj] = Pt @ Hi_W[j, :p_j].T
blas.{{prefix}}gemm("N", "T", &model._k_states, &p_j, &model._k_states,
&scale, &kfilter.predicted_state_cov[0, 0, t], &kfilter.k_states,
- &Hi_W[0, 0, j], &model.k_endog,
- &beta, &weights[0, 0, t, j], &kfilter.k_states)
+ &Hi_W[0, 0, jj], &model.k_endog,
+ &beta, &weights[0, 0, tt, jj], &kfilter.k_states)
- # state_intercept_weights[:, :, t, j] = -(Pt @ Lt' @ Nt)
+ # state_intercept_weights[:, :, tt, jj] = -(Pt @ Lt' @ Nt)
# Note: normally would need to multiply Pt by scale and divide Nt
# by `scale`, but since they're being multiplied here, the scale terms
# cancel out.
@@ -1454,7 +1474,7 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
blas.{{prefix}}gemm("N", "N", &model._k_states, &model._k_states, &model._k_states,
&alpha, &kfilter.predicted_state_cov[0, 0, t], &kfilter.k_states,
kfilter._tmp00, &kfilter.k_states,
- &beta, &state_intercept_weights[0, 0, t, j], &kfilter.k_states)
+ &beta, &state_intercept_weights[0, 0, tt, jj], &kfilter.k_states)
# Forwards from j = t+1, t+2, ..., max_j
# tmp = Pt
@@ -1471,14 +1491,17 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
kfilter._tmp0, &inc)
for j in range(t + 1, max_j + 1):
+ # Again, recall that double-letters denotes indexes that start
+ # from t0
+ jj = j - t0
p_j = p - model.nmissing[j]
if store_j[j] and p_j > 0:
- # weights[:, :p_j, t, j] = tmp @ Hi_W[j, :p_j].T
+ # weights[:, :p_j, tt, jj] = tmp @ Hi_W[j, :p_j].T
blas.{{prefix}}gemm("N", "T", &model._k_states, &p_j, &model._k_states,
&alpha, kfilter._tmp0, &kfilter.k_states,
- &Hi_W[0, 0, j], &p,
- &beta, &weights[0, 0, t, j], &kfilter.k_states)
+ &Hi_W[0, 0, jj], &p,
+ &beta, &weights[0, 0, tt, jj], &kfilter.k_states)
# tmp = tmp @ L[j].T
blas.{{prefix}}gemm("N", "T", &model._k_states, &model._k_states, &model._k_states,
@@ -1489,12 +1512,12 @@ cpdef _{{prefix}}compute_smoothed_state_weights(
kfilter._tmp0, &inc)
if store_j[j]:
- # state_intercept_weights[:, :, t, j] = -(tmp @ N_j)
+ # state_intercept_weights[:, :, tt, jj] = -(tmp @ N_j)
scalar = -1 / scale
blas.{{prefix}}gemm("N", "N", &model._k_states, &model._k_states, &model._k_states,
&scalar, kfilter._tmp00, &kfilter.k_states,
&smoother.scaled_smoothed_estimator_cov[0, 0, j + 1], &kfilter.k_states,
- &beta, &state_intercept_weights[0, 0, t, j], &kfilter.k_states)
+ &beta, &state_intercept_weights[0, 0, tt, jj], &kfilter.k_states)
return np.array(weights), np.array(state_intercept_weights), np.array(prior_weights), np.array(Hi_W)
diff --git a/statsmodels/tsa/statespace/dynamic_factor_mq.py b/statsmodels/tsa/statespace/dynamic_factor_mq.py
index ad03001fe89..1a58e0a0152 100644
--- a/statsmodels/tsa/statespace/dynamic_factor_mq.py
+++ b/statsmodels/tsa/statespace/dynamic_factor_mq.py
@@ -3193,8 +3193,9 @@ def simulate(self, params, nsimulations, measurement_shocks=None,
if use_pandas:
# pd.Series (k_endog=1, replications=None)
if len(shape) == 1:
- sim *= self._endog_std.iloc[0]
- sim += self._endog_mean.iloc[0]
+ std = self._endog_std.iloc[0]
+ mean = self._endog_mean.iloc[0]
+ sim = sim * std + mean
# pd.DataFrame (k_endog > 1, replications=None)
# [or]
# pd.DataFrame with MultiIndex (replications > 0)
@@ -3672,9 +3673,9 @@ def get_prediction(self, start=None, end=None, dynamic=False,
def news(self, comparison, impact_date=None, impacted_variable=None,
start=None, end=None, periods=None, exog=None,
- comparison_type=None, state_index=None, return_raw=False,
- tolerance=1e-10, endog_quarterly=None, original_scale=True,
- **kwargs):
+ comparison_type=None, revisions_details_start=False,
+ state_index=None, return_raw=False, tolerance=1e-10,
+ endog_quarterly=None, original_scale=True, **kwargs):
"""
Compute impacts from updated data (news and revisions).
@@ -3757,6 +3758,7 @@ def news(self, comparison, impact_date=None, impacted_variable=None,
comparison, impact_date=impact_date,
impacted_variable=impacted_variable, start=start, end=end,
periods=periods, exog=exog, comparison_type=comparison_type,
+ revisions_details_start=revisions_details_start,
state_index=state_index, return_raw=return_raw,
tolerance=tolerance, endog_quarterly=endog_quarterly, **kwargs)
@@ -3776,11 +3778,18 @@ def news(self, comparison, impact_date=None, impacted_variable=None,
if news_results.revision_impacts is not None:
news_results.revision_impacts = (
news_results.revision_impacts * endog_std)
+ if news_results.revision_detailed_impacts is not None:
+ news_results.revision_detailed_impacts = (
+ news_results.revision_detailed_impacts * endog_std)
+ if news_results.revision_grouped_impacts is not None:
+ news_results.revision_grouped_impacts = (
+ news_results.revision_grouped_impacts * endog_std)
# Update forecasts
for name in ['prev_impacted_forecasts', 'news', 'revisions',
'update_realized', 'update_forecasts',
- 'revised', 'revised_prev', 'post_impacted_forecasts']:
+ 'revised', 'revised_prev', 'post_impacted_forecasts',
+ 'revisions_all', 'revised_all', 'revised_prev_all']:
dta = getattr(news_results, name)
# for pd.Series, dta.multiply(...) and (sometimes) dta.add(...)
diff --git a/statsmodels/tsa/statespace/kalman_smoother.py b/statsmodels/tsa/statespace/kalman_smoother.py
index 253a02cef9d..134662d2af1 100644
--- a/statsmodels/tsa/statespace/kalman_smoother.py
+++ b/statsmodels/tsa/statespace/kalman_smoother.py
@@ -1057,7 +1057,7 @@ def smoothed_state_autocovariance(self, lag=1, t=None, start=None,
return acov
def news(self, previous, t=None, start=None, end=None,
- revised=None, design=None, state_index=None):
+ revisions_details_start=True, design=None, state_index=None):
r"""
Compute the news and impacts associated with a data release
@@ -1080,6 +1080,15 @@ def news(self, previous, t=None, start=None, end=None,
since it is an exclusive endpoint, the returned news do not include
the value at this index. Cannot be used in combination with the `t`
argument.
+ revisions_details_start : bool or int, optional
+ The period at which to beging computing the detailed impacts of
+ data revisions. Any revisions prior to this period will have their
+ impacts grouped together. If a negative integer, interpreted as
+ an offset from the end of the dataset. If set to True, detailed
+ impacts are computed for all revisions, while if set to False, all
+ revisions are grouped together. Default is False. Note that for
+ large models, setting this to be near the beginning of the sample
+ can cause this function to be slow.
design : array, optional
Design matrix for the period `t` in time-varying models. If this
model has a time-varying design matrix, and the argument `t` is out
@@ -1102,8 +1111,8 @@ def news(self, previous, t=None, start=None, end=None,
the news. It is equivalent to E[y^i | post] - E[y^i | revision],
where y^i are the variables of interest. In [1]_, this is
described as "revision" in equation (17).
- - `revision_impacts`: update to forecasts of variables impacted
- variables from data revisions. It is
+ - `revision_detailed_impacts`: update to forecasts of variables
+ impacted variables from data revisions. It is
E[y^i | revision] - E[y^i | previous], and does not have a
specific notation in [1]_, since there for simplicity they assume
that there are no revisions.
@@ -1112,18 +1121,26 @@ def news(self, previous, t=None, start=None, end=None,
were newly incorporated in a data release (but not including
revisions to data points that already existed in the previous
release). In [1]_, this is described as "news" in equation (17).
- - `revisions`
+ - `revisions`: y^r(updated) - y^r(previous) for periods in
+ which detailed impacts were computed
+ - `revisions_all` : y^r(updated) - y^r(previous) for all revisions
- `gain`: the gain matrix associated with the "Kalman-like" update
from the news, E[y I'] E[I I']^{-1}. In [1]_, this can be found
in the equation For E[y_{k,t_k} \mid I_{v+1}] in the middle of
page 17.
- - `revision_weights`
+ - `revision_weights` weights on observations for the smoothed
+ signal
- `update_forecasts`: forecasts of the updated periods used to
construct the news, E[y^u | previous].
- `update_realized`: realizations of the updated periods used to
construct the news, y^u.
- - `revised_prev`
- - `revised`
+ - `revised`: revised observations of the periods that were revised
+ and for which detailed impacts were computed
+ - `revised`: revised observations of the periods that were revised
+ - `revised_prev`: previous observations of the periods that were
+ revised and for which detailed impacts were computed
+ - `revised_prev_all`: previous observations of the periods that
+ were revised and for which detailed impacts were computed
- `prev_impacted_forecasts`: previous forecast of the periods of
interest, E[y^i | previous].
- `post_impacted_forecasts`: forecast of the periods of interest
@@ -1131,8 +1148,18 @@ def news(self, previous, t=None, start=None, end=None,
E[y^i | post].
- `revision_results`: results object that updates the `previous`
results to take into account data revisions.
+ - `revision_results`: results object associated with the revisions
+ - `revision_impacts`: total impacts from all revisions (both
+ grouped and detailed)
- `revisions_ix`: list of `(t, i)` positions of revisions in endog
+ - `revisions_details`: list of `(t, i)` positions of revisions to
+ endog for which details of impacts were computed
+ - `revisions_grouped`: list of `(t, i)` positions of revisions to
+ endog for which impacts were grouped
+ - `revisions_details_start`: period in which revision details start
+ to be computed
- `updates_ix`: list of `(t, i)` positions of updates to endog
+ - `state_index`: index of state variables used to compute impacts
Notes
-----
@@ -1233,21 +1260,67 @@ def news(self, previous, t=None, start=None, end=None,
post_impacted_forecasts = self.predict(
start=start, end=end).smoothed_forecasts
- # Get revision weights, impacts, and forecasts
+ # Separate revisions into those with detailed impacts and those where
+ # impacts are grouped together
+ if revisions_details_start is True:
+ revisions_details_start = 0
+ elif revisions_details_start is False:
+ revisions_details_start = previous.nobs
+ elif revisions_details_start < 0:
+ revisions_details_start = previous.nobs + revisions_details_start
+
+ revisions_grouped = []
+ revisions_details = []
+ if revisions_details_start > 0:
+ for s, i in revisions_ix:
+ if s < revisions_details_start:
+ revisions_grouped.append((s, i))
+ else:
+ revisions_details.append((s, i))
+ else:
+ revisions_details = revisions_ix
+
+ # Practically, don't compute impacts of revisions prior to first
+ # point that was actually revised
if len(revisions_ix) > 0:
+ revisions_details_start = max(revisions_ix[0][0],
+ revisions_details_start)
+
+ # Setup default (empty) output for revisions
+ revised_endog = None
+ revised_all = None
+ revised_prev_all = None
+ revisions_all = None
+
+ revised = None
+ revised_prev = None
+ revisions = None
+ revision_weights = None
+ revision_detailed_impacts = None
+ revision_results = None
+ revision_impacts = None
+
+ # Get revisions datapoints for all revisions (regardless of whether
+ # or not we are computing detailed impacts)
+ if len(revisions_ix) > 0:
+ # Indexes
+ revised_j, revised_p = zip(*revisions_ix)
+ compute_j = np.arange(revised_j[0], revised_j[-1] + 1)
+
+ # Data from updated model
revised_endog = self.endog[:, :previous.nobs].copy()
+ # ("revisions" are points where data was previously published and
+ # then changed, so we need to ignore "updates", which are points
+ # that were not previously published)
revised_endog[previous.missing.astype(bool)] = np.nan
+ # subset to revision periods
+ revised_all = revised_endog.T[compute_j]
- # Compute the revisions
- revised_j, revised_p = zip(*revisions_ix)
- compute_j = np.arange(revised_j[0], revised_j[-1] + 1)
- revised_prev = previous.endog.T[compute_j]
- revised = revised_endog.T[compute_j]
- revisions = (revised - revised_prev)
+ # Data from original model
+ revised_prev_all = previous.endog.T[compute_j]
- # Compute the weights of the smoothed state vector
- compute_t = np.arange(start, end)
- ix = np.ix_(compute_t, compute_j)
+ # revision = updated - original
+ revisions_all = (revised_all - revised_prev_all)
# Construct a model from which we can create weights for impacts
# through `end`
@@ -1274,74 +1347,111 @@ def news(self, previous, t=None, start=None, end=None,
rev_mod.initialize(init)
revision_results = rev_mod.smooth()
- smoothed_state_weights, _, _ = (
- tools._compute_smoothed_state_weights(
- rev_mod, compute_t=compute_t, compute_j=compute_j,
- compute_prior_weights=False, scale=previous.scale))
- smoothed_state_weights = smoothed_state_weights[ix]
-
- # Convert the weights in terms of smoothed forecasts
- # t, j, m, p, i
- ZT = rev_mod.design.T
- if ZT.shape[0] > 1:
- ZT = ZT[compute_t]
-
- # Subset the states used for the impacts if applicable
- if state_index is not None:
- ZT = ZT[:, state_index, :]
- smoothed_state_weights = (
- smoothed_state_weights[:, :, state_index])
+ # Get detailed revision weights, impacts, and forecasts
+ if len(revisions_details) > 0:
+ # Indexes for the subset of revisions for which we are
+ # computing detailed impacts
+ compute_j = np.arange(revisions_details_start,
+ revised_j[-1] + 1)
+ # Offset describing revisions for which we are not computing
+ # detailed impacts
+ offset = revisions_details_start - revised_j[0]
+ revised = revised_all[offset:]
+ revised_prev = revised_prev_all[offset:]
+ revisions = revisions_all[offset:]
+
+ # Compute the weights of the smoothed state vector
+ compute_t = np.arange(start, end)
+
+ smoothed_state_weights, _, _ = (
+ tools._compute_smoothed_state_weights(
+ rev_mod, compute_t=compute_t, compute_j=compute_j,
+ compute_prior_weights=False, scale=previous.scale))
+
+ # Convert the weights in terms of smoothed forecasts
+ # t, j, m, p, i
+ ZT = rev_mod.design.T
+ if ZT.shape[0] > 1:
+ ZT = ZT[compute_t]
+
+ # Subset the states used for the impacts if applicable
+ if state_index is not None:
+ ZT = ZT[:, state_index, :]
+ smoothed_state_weights = (
+ smoothed_state_weights[:, :, state_index])
+
+ # Multiplication gives: t, j, m, p * t, j, m, p, k
+ # Sum along axis=2 gives: t, j, p, k
+ # Transpose to: t, j, k, p (i.e. like t, j, m, p but with k
+ # instead of m)
+ revision_weights = np.nansum(
+ smoothed_state_weights[..., None]
+ * ZT[:, None, :, None, :], axis=2).transpose(0, 1, 3, 2)
+
+ # Multiplication gives: t, j, k, p * t, j, k, p
+ # Sum along axes 1, 3 gives: t, k
+ # This is also a valid way to compute impacts, but it employs
+ # unnecessary multiplications with zeros; it is better to use
+ # the below method that flattens the revision indices before
+ # computing the impacts
+ # revision_detailed_impacts = np.nansum(
+ # revision_weights * revisions[None, :, None, :],
+ # axis=(1, 3))
+
+ # Flatten the weights and revisions along the revised j, k
+ # dimensions so that we only retain the actual revision
+ # elements
+ revised_j, revised_p = zip(*[
+ s for s in revisions_ix
+ if s[0] >= revisions_details_start])
+ ix_j = revised_j - revised_j[0]
+ # Shape is: t, k, j * p
+ # Note: have to transpose first so that the two advanced
+ # indexes are next to each other, so that "the dimensions from
+ # the advanced indexing operations are inserted into the result
+ # array at the same spot as they were in the initial array"
+ # (see https://numpy.org/doc/stable/user/basics.indexing.html,
+ # "Combining advanced and basic indexing")
+ revision_weights = (
+ revision_weights.transpose(0, 2, 1, 3)[:, :,
+ ix_j, revised_p])
+ # Shape is j * k
+ revisions = revisions[ix_j, revised_p]
+ # Shape is t, k
+ revision_detailed_impacts = revision_weights @ revisions
+
+ # Similarly, flatten the revised and revised_prev series
+ revised = revised[ix_j, revised_p]
+ revised_prev = revised_prev[ix_j, revised_p]
+
+ # Squeeze if `t` argument used
+ if t is not None:
+ revision_weights = revision_weights[0]
+ revision_detailed_impacts = revision_detailed_impacts[0]
+
+ # Get total revision impacts
+ revised_impact_forecasts = (
+ revision_results.smoothed_forecasts[..., start:end])
+ if end > revision_results.nobs:
+ predict_start = max(start, revision_results.nobs)
+ p = revision_results.predict(
+ start=predict_start, end=end, **extend_kwargs)
+ revised_impact_forecasts = np.concatenate(
+ (revised_impact_forecasts, p.forecasts), axis=1)
+
+ revision_impacts = (revised_impact_forecasts -
+ prev_impacted_forecasts).T
+ if t is not None:
+ revision_impacts = revision_impacts[0]
- # Multiplication gives: t, j, m, p * t, j, m, p, k
- # Sum along axis=2 gives: t, j, p, k
- # Transpose to: t, j, k, p (i.e. like t, j, m, p but with k instead
- # of m)
- revision_weights = np.nansum(
- smoothed_state_weights[..., None]
- * ZT[:, None, :, None, :], axis=2).transpose(0, 1, 3, 2)
-
- # Multiplication gives: t, j, k, p * t, j, k, p
- # Sum along axes 1, 3 gives: t, k
- # This is also a valid way to compute impacts, but it employes
- # unnecessary multiplications with zeros; it is better to use the
- # below method that flattens the revision indices before computing
- # the impacts
- # revision_impacts = np.nansum(
- # revision_weights * revisions[None, :, None, :], axis=(1, 3))
-
- # Flatten the weights and revisions along the revised j, k
- # dimensions so that we only retain the actual revision elements
+ # Need to also flatten the revisions items that contain all revisions
+ if len(revisions_ix) > 0:
+ revised_j, revised_p = zip(*revisions_ix)
ix_j = revised_j - revised_j[0]
- # Shape is: t, k, j * p
- # Note: have to transpose first so that the two advanced indexes
- # are next to each other, so that "the dimensions from the
- # advanced indexing operations are inserted into the result
- # array at the same spot as they were in the initial array"
- # (see https://numpy.org/doc/stable/user/basics.indexing.html,
- # "Combining advanced and basic indexing")
- revision_weights = (
- revision_weights.transpose(0, 2, 1, 3)[:, :, ix_j, revised_p])
- # Shape is j * k
- revisions = revisions[ix_j, revised_p]
- # Shape is t, k
- revision_impacts = revision_weights @ revisions
-
- # Similarly, flatten the revised and revised_prev series
- revised = revised[ix_j, revised_p]
- revised_prev = revised_prev[ix_j, revised_p]
- # Squeeze if `t` argument used
- if t is not None:
- revision_weights = revision_weights[0]
- revision_impacts = revision_impacts[0]
- else:
- revised_endog = None
- revised = None
- revised_prev = None
- revisions = None
- revision_weights = None
- revision_impacts = None
- revision_results = None
+ revisions_all = revisions_all[ix_j, revised_p]
+ revised_all = revised_all[ix_j, revised_p]
+ revised_prev_all = revised_prev_all[ix_j, revised_p]
# Now handle updates
if len(updates_ix) > 0:
@@ -1417,11 +1527,14 @@ def news(self, previous, t=None, start=None, end=None,
update_impacts=update_impacts,
# update to forecast of variables of interest from revisions
# = E[y^i | revision] - E[y^i | previous]
- revision_impacts=revision_impacts,
+ revision_detailed_impacts=revision_detailed_impacts,
# news = A = y^u - E[y^u | previous]
news=update_forecasts_error,
- # revivions y^r(updated) - y^r(previous)
+ # revivions y^r(updated) - y^r(previous) for periods in which
+ # detailed impacts were computed
revisions=revisions,
+ # revivions y^r(updated) - y^r(previous)
+ revisions_all=revisions_all,
# gain matrix = E[y A'] E[A A']^{-1}
gain=obs_gain,
# weights on observations for the smoothed signal
@@ -1432,20 +1545,38 @@ def news(self, previous, t=None, start=None, end=None,
# realizations of the updated periods used to construct the news
# = y^u
update_realized=update_realized,
- # revised observations of the periods that were revised
+ # revised observations of the periods that were revised and for
+ # which detailed impacts were computed
# = y^r_{revised}
revised=revised,
- # previous observations of the periods that were revised
+ # revised observations of the periods that were revised
+ # = y^r_{revised}
+ revised_all=revised_all,
+ # previous observations of the periods that were revised and for
+ # which detailed impacts were computed
# = y^r_{previous}
revised_prev=revised_prev,
+ # previous observations of the periods that were revised
+ # = y^r_{previous}
+ revised_prev_all=revised_prev_all,
# previous forecast of the periods of interest, E[y^i | previous]
prev_impacted_forecasts=prev_impacted_forecasts,
# post. forecast of the periods of interest, E[y^i | post]
post_impacted_forecasts=post_impacted_forecasts,
# results object associated with the revision
revision_results=revision_results,
+ # total impacts from all revisions (both grouped and detailed)
+ revision_impacts=revision_impacts,
# list of (x, y) positions of revisions to endog
revisions_ix=revisions_ix,
+ # list of (x, y) positions of revisions to endog for which details
+ # of impacts were computed
+ revisions_details=revisions_details,
+ # list of (x, y) positions of revisions to endog for which impacts
+ # were grouped
+ revisions_grouped=revisions_grouped,
+ # period in which revision details start to be computed
+ revisions_details_start=revisions_details_start,
# list of (x, y) positions of updates to endog
updates_ix=updates_ix,
# index of state variables used to compute impacts
diff --git a/statsmodels/tsa/statespace/mlemodel.py b/statsmodels/tsa/statespace/mlemodel.py
index cb4f1defaae..3910a55339a 100644
--- a/statsmodels/tsa/statespace/mlemodel.py
+++ b/statsmodels/tsa/statespace/mlemodel.py
@@ -3779,11 +3779,11 @@ def _apply(self, mod, refit=False, fit_kwargs=None):
fit_kwargs['cov_kwds'] = {
'custom_cov_type': self.cov_type,
'custom_cov_params': self.cov_params_default,
- 'custom_description': ('Parameters and standard errors'
- ' were estimated using a different'
- ' dataset and were then applied to'
- ' this dataset. %s'
- % self.cov_kwds['description'])}
+ 'custom_description': (
+ 'Parameters and standard errors were estimated using a'
+ ' different dataset and were then applied to this'
+ ' dataset. %s'
+ % self.cov_kwds.get('description', 'Unknown.'))}
if self.smoother_results is not None:
func = mod.smooth
@@ -3869,34 +3869,42 @@ def _get_previous_updated(self, comparison, exog=None,
return previous, updated, comparison_dataset
def _news_previous_results(self, previous, start, end, periods,
+ revisions_details_start=False,
state_index=None):
# Compute the news
- out = self.smoother_results.news(previous.smoother_results,
- start=start, end=end,
- state_index=state_index)
+ out = self.smoother_results.news(
+ previous.smoother_results, start=start, end=end,
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
return out
def _news_updated_results(self, updated, start, end, periods,
- state_index=None):
- return updated._news_previous_results(self, start, end, periods,
- state_index=state_index)
+ revisions_details_start=False, state_index=None):
+ return updated._news_previous_results(
+ self, start, end, periods,
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
def _news_previous_data(self, endog, start, end, periods, exog,
- state_index=None):
+ revisions_details_start=False, state_index=None):
previous = self.apply(endog, exog=exog, copy_initialization=True)
- return self._news_previous_results(previous, start, end, periods,
- state_index=state_index)
+ return self._news_previous_results(
+ previous, start, end, periods,
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
def _news_updated_data(self, endog, start, end, periods, exog,
- state_index=None):
+ revisions_details_start=False, state_index=None):
updated = self.apply(endog, exog=exog, copy_initialization=True)
- return self._news_updated_results(updated, start, end, periods,
- state_index=state_index)
+ return self._news_updated_results(
+ updated, start, end, periods,
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
def news(self, comparison, impact_date=None, impacted_variable=None,
start=None, end=None, periods=None, exog=None,
- comparison_type=None, state_index=None, return_raw=False,
- tolerance=1e-10, **kwargs):
+ comparison_type=None, revisions_details_start=False,
+ state_index=None, return_raw=False, tolerance=1e-10, **kwargs):
"""
Compute impacts from updated data (news and revisions)
@@ -3936,6 +3944,15 @@ def news(self, comparison, impact_date=None, impacted_variable=None,
*previous* results object or dataset or an *updated* results object
or dataset. If not specified, then an attempt is made to determine
the comparison type.
+ revisions_details_start : bool, int, str, or datetime, optional
+ The period at which to beging computing the detailed impacts of
+ data revisions. Any revisions prior to this period will have their
+ impacts grouped together. If a negative integer, interpreted as
+ an offset from the end of the dataset. If set to True, detailed
+ impacts are computed for all revisions, while if set to False, all
+ revisions are grouped together. Default is False. Note that for
+ large models, setting this to be near the beginning of the sample
+ can cause this function to be slow.
state_index : array_like, optional
An optional index specifying a subset of states to use when
constructing the impacts of revisions and news. For example, if
@@ -3984,6 +4001,11 @@ def news(self, comparison, impact_date=None, impacted_variable=None,
' large for the number of states in the model'
f' ({self.model.k_states}).')
+ if not isinstance(revisions_details_start, (int, bool)):
+ revisions_details_start, _, _, _ = (
+ self.model._get_prediction_index(
+ revisions_details_start, revisions_details_start))
+
# Get the previous and updated results objects from `self` and
# `comparison`:
previous, updated, comparison_dataset = self._get_previous_updated(
@@ -4045,9 +4067,10 @@ def news(self, comparison, impact_date=None, impacted_variable=None,
updated = updated_orig.append(extra, exog=exog, **kwargs)
# Compute the news
- news_results = (
- updated._news_previous_results(previous, start, end + 1, periods,
- state_index=state_index))
+ news_results = updated._news_previous_results(
+ previous, start, end + 1, periods,
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
if not return_raw:
news_results = NewsResults(
diff --git a/statsmodels/tsa/statespace/news.py b/statsmodels/tsa/statespace/news.py
index 95e8bffe805..3f52ec34898 100644
--- a/statsmodels/tsa/statespace/news.py
+++ b/statsmodels/tsa/statespace/news.py
@@ -5,12 +5,13 @@
Author: Chad Fulton
License: BSD-3
"""
+from statsmodels.compat.pandas import FUTURE_STACK
import numpy as np
import pandas as pd
-from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.summary import Summary
+from statsmodels.iolib.table import SimpleTable
from statsmodels.iolib.tableformatting import fmt_params
@@ -45,47 +46,76 @@ class NewsResults:
Attributes
----------
- total_impacts : pd.Series
+ total_impacts : pd.DataFrame
Updates to forecasts of impacted variables from both news and data
revisions, E[y^i | post] - E[y^i | previous].
- update_impacts : pd.Series
+ update_impacts : pd.DataFrame
Updates to forecasts of impacted variables from the news,
E[y^i | post] - E[y^i | revisions] where y^i are the impacted variables
of interest.
- revision_impacts : pd.Series
- Updates to forecasts of impacted variables from data revisions,
+ revision_impacts : pd.DataFrame
+ Updates to forecasts of impacted variables from all data revisions,
E[y^i | revisions] - E[y^i | previous].
- news : pd.Series
+ news : pd.DataFrame
The unexpected component of the updated data,
E[y^u | post] - E[y^u | revisions] where y^u are the updated variables.
- weights : pd.Series
+ weights : pd.DataFrame
Weights describing the effect of news on variables of interest.
- revisions : pd.Series
- The revisions betwen the current and previously observed data
+ revisions : pd.DataFrame
+ The revisions between the current and previously observed data, for
+ revisions for which detailed impacts were computed.
+ revisions_all : pd.DataFrame
+ The revisions between the current and previously observed data,
y^r_{revised} - y^r_{previous} where y^r are the revised variables.
- revision_weights : pd.Series
- Weights describing the effect of revisions on variables of interest.
- update_forecasts : pd.Series
+ revision_weights : pd.DataFrame
+ Weights describing the effect of revisions on variables of interest,
+ for revisions for which detailed impacts were computed.
+ revision_weights_all : pd.DataFrame
+ Weights describing the effect of revisions on variables of interest,
+ with a new entry that includes NaNs for the revisions for which
+ detailed impacts were not computed.
+ update_forecasts : pd.DataFrame
Forecasts based on the previous dataset of the variables that were
updated, E[y^u | previous].
- update_realized : pd.Series
+ update_realized : pd.DataFrame
Actual observed data associated with the variables that were
updated, y^u
- revised_prev : pd.Series
+ revisions_details_start : int
+ Integer index of first period in which detailed revision impacts were
+ computed.
+ revision_detailed_impacts : pd.DataFrame
+ Updates to forecasts of impacted variables from data revisions with
+ detailed impacts, E[y^i | revisions] - E[y^i | grouped revisions].
+ revision_grouped_impacts : pd.DataFrame
+ Updates to forecasts of impacted variables from data revisions that
+ were grouped together, E[y^i | grouped revisions] - E[y^i | previous].
+ revised_prev : pd.DataFrame
+ Previously observed data associated with the variables that were
+ revised, for revisions for which detailed impacts were computed.
+ revised_prev_all : pd.DataFrame
Previously observed data associated with the variables that were
revised, y^r_{previous}
- revised : pd.Series
+ revised : pd.DataFrame
+ Currently observed data associated with the variables that were
+ revised, for revisions for which detailed impacts were computed.
+ revised_all : pd.DataFrame
Currently observed data associated with the variables that were
revised, y^r_{revised}
- prev_impacted_forecasts : pd.Series
+ prev_impacted_forecasts : pd.DataFrame
Previous forecast of the variables of interest, E[y^i | previous].
- post_impacted_forecasts : pd.Series
+ post_impacted_forecasts : pd.DataFrame
Forecast of the variables of interest after taking into account both
revisions and updates, E[y^i | post].
revisions_iloc : pd.DataFrame
The integer locations of the data revisions in the dataset.
revisions_ix : pd.DataFrame
The label-based locations of the data revisions in the dataset.
+ revisions_iloc_detailed : pd.DataFrame
+ The integer locations of the data revisions in the dataset for which
+ detailed impacts were computed.
+ revisions_ix_detailed : pd.DataFrame
+ The label-based locations of the data revisions in the dataset for
+ which detailed impacts were computed.
updates_iloc : pd.DataFrame
The integer locations of the updated data points.
updates_ix : pd.DataFrame
@@ -126,30 +156,56 @@ def __init__(self, news_results, model, updated, previous,
self.endog_names = self.updated.model.endog_names
self.k_endog = len(self.endog_names)
+ self.n_revisions = len(self.news_results.revisions_ix)
+ self.n_revisions_detailed = len(self.news_results.revisions_details)
+ self.n_revisions_grouped = len(self.news_results.revisions_grouped)
+
index = self.updated.model._index
columns = np.atleast_1d(self.endog_names)
# E[y^i | post]
self.post_impacted_forecasts = pd.DataFrame(
news_results.post_impacted_forecasts.T,
- index=self.row_labels, columns=columns)
+ index=self.row_labels, columns=columns).rename_axis(
+ index='impact date', columns='impacted variable')
# E[y^i | previous]
self.prev_impacted_forecasts = pd.DataFrame(
news_results.prev_impacted_forecasts.T,
- index=self.row_labels, columns=columns)
+ index=self.row_labels, columns=columns).rename_axis(
+ index='impact date', columns='impacted variable')
# E[y^i | post] - E[y^i | revisions]
self.update_impacts = pd.DataFrame(
news_results.update_impacts,
- index=self.row_labels, columns=columns)
+ index=self.row_labels, columns=columns).rename_axis(
+ index='impact date', columns='impacted variable')
+ # E[y^i | revisions] - E[y^i | grouped revisions]
+ self.revision_detailed_impacts = pd.DataFrame(
+ news_results.revision_detailed_impacts,
+ index=self.row_labels,
+ columns=columns,
+ dtype=float,
+ ).rename_axis(index="impact date", columns="impacted variable")
# E[y^i | revisions] - E[y^i | previous]
self.revision_impacts = pd.DataFrame(
news_results.revision_impacts,
- index=self.row_labels, columns=columns)
+ index=self.row_labels,
+ columns=columns,
+ dtype=float,
+ ).rename_axis(index="impact date", columns="impacted variable")
+ # E[y^i | grouped revisions] - E[y^i | previous]
+ self.revision_grouped_impacts = (
+ self.revision_impacts
+ - self.revision_detailed_impacts.fillna(0))
+ if self.n_revisions_grouped == 0:
+ self.revision_grouped_impacts.loc[:] = 0
+
# E[y^i | post] - E[y^i | previous]
self.total_impacts = (self.post_impacted_forecasts -
self.prev_impacted_forecasts)
# Indices of revisions and updates
+ self.revisions_details_start = news_results.revisions_details_start
+
self.revisions_iloc = pd.DataFrame(
list(zip(*news_results.revisions_ix)),
index=['revision date', 'revised variable']).T
@@ -161,6 +217,10 @@ def __init__(self, news_results, model, updated, previous,
else:
self.revisions_ix = iloc.copy()
+ mask = iloc['revision date'] >= self.revisions_details_start
+ self.revisions_iloc_detailed = self.revisions_iloc[mask]
+ self.revisions_ix_detailed = self.revisions_ix[mask]
+
self.updates_iloc = pd.DataFrame(
list(zip(*news_results.updates_ix)),
index=['update date', 'updated variable']).T
@@ -176,9 +236,12 @@ def __init__(self, news_results, model, updated, previous,
self.state_index = news_results.state_index
# Wrap forecasts and forecasts errors
- r_ix = pd.MultiIndex.from_arrays([
+ r_ix_all = pd.MultiIndex.from_arrays([
self.revisions_ix['revision date'],
self.revisions_ix['revised variable']])
+ r_ix = pd.MultiIndex.from_arrays([
+ self.revisions_ix_detailed['revision date'],
+ self.revisions_ix_detailed['revised variable']])
u_ix = pd.MultiIndex.from_arrays([
self.updates_ix['update date'],
self.updates_ix['updated variable']])
@@ -189,13 +252,21 @@ def __init__(self, news_results, model, updated, previous,
dtype=model.params.dtype)
else:
self.news = pd.Series(news_results.news, index=u_ix, name='news')
- # E[y^u | revisions] - E[y^u | previous]
+ # Revisions to data (y^r_{revised} - y^r_{previous})
+ if news_results.revisions_all is None:
+ self.revisions_all = pd.Series([], index=r_ix_all, name='revision',
+ dtype=model.params.dtype)
+ else:
+ self.revisions_all = pd.Series(news_results.revisions_all,
+ index=r_ix_all, name='revision')
+ # Revisions to data (y^r_{revised} - y^r_{previous}) for which detailed
+ # impacts were computed
if news_results.revisions is None:
self.revisions = pd.Series([], index=r_ix, name='revision',
dtype=model.params.dtype)
else:
- self.revisions = pd.Series(news_results.revisions, index=r_ix,
- name='revision')
+ self.revisions = pd.Series(news_results.revisions,
+ index=r_ix, name='revision')
# E[y^u | revised]
if news_results.update_forecasts is None:
self.update_forecasts = pd.Series([], index=u_ix,
@@ -204,6 +275,14 @@ def __init__(self, news_results, model, updated, previous,
self.update_forecasts = pd.Series(
news_results.update_forecasts, index=u_ix)
# y^r_{revised}
+ if news_results.revised_all is None:
+ self.revised_all = pd.Series([], index=r_ix_all,
+ dtype=model.params.dtype,
+ name='revised')
+ else:
+ self.revised_all = pd.Series(news_results.revised_all,
+ index=r_ix_all, name='revised')
+ # y^r_{revised} for which detailed impacts were computed
if news_results.revised is None:
self.revised = pd.Series([], index=r_ix, dtype=model.params.dtype,
name='revised')
@@ -211,6 +290,13 @@ def __init__(self, news_results, model, updated, previous,
self.revised = pd.Series(news_results.revised, index=r_ix,
name='revised')
# y^r_{previous}
+ if news_results.revised_prev_all is None:
+ self.revised_prev_all = pd.Series([], index=r_ix_all,
+ dtype=model.params.dtype)
+ else:
+ self.revised_prev_all = pd.Series(
+ news_results.revised_prev_all, index=r_ix_all)
+ # y^r_{previous} for which detailed impacts were computed
if news_results.revised_prev is None:
self.revised_prev = pd.Series([], index=r_ix,
dtype=model.params.dtype)
@@ -235,7 +321,7 @@ def __init__(self, news_results, model, updated, previous,
self.weights.columns.names = ['impact date', 'impacted variable']
# reshaped version of revision_weights
- if len(self.revisions_iloc):
+ if self.n_revisions_detailed > 0:
revision_weights = news_results.revision_weights.reshape(
len(cols), len(r_ix))
else:
@@ -245,6 +331,9 @@ def __init__(self, news_results, model, updated, previous,
self.revision_weights.columns.names = [
'impact date', 'impacted variable']
+ self.revision_weights_all = self.revision_weights.reindex(
+ self.revised_all.index)
+
@property
def impacted_variable(self):
return self._impacted_variable
@@ -276,6 +365,8 @@ def data_revisions(self):
in the previous dataset.
- `revised`: the revised value of the data, as it is observed
in the new dataset
+ - `detailed impacts computed`: whether or not detailed impacts have
+ been computed in these NewsResults for this revision
See also
--------
@@ -283,9 +374,11 @@ def data_revisions(self):
"""
# Save revisions data
data = pd.concat([
- self.revised.rename('revised'),
- self.revised_prev.rename('observed (prev)')
+ self.revised_all.rename('revised'),
+ self.revised_prev_all.rename('observed (prev)')
], axis=1).sort_index()
+ data['detailed impacts computed'] = (
+ self.revised_all.index.isin(self.revised.index))
return data
@property
@@ -375,7 +468,8 @@ def details_by_impact(self):
revision_details_by_update
impacts
"""
- df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
+ s = self.weights.stack(level=[0, 1], **FUTURE_STACK)
+ df = s.rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
@@ -392,9 +486,23 @@ def details_by_impact(self):
if self.impacted_variable is not None and len(df) > 0:
df = df.loc[np.s_[:, self.impacted_variable], :]
- mask = np.abs(df['weight']) > self.tolerance
+ mask = np.abs(df['impact']) > self.tolerance
return df[mask]
+ @property
+ def _revision_grouped_impacts(self):
+ s = self.revision_grouped_impacts.stack(**FUTURE_STACK)
+ df = s.rename('impact').to_frame()
+ df = df.reindex(['revision date', 'revised variable', 'impact'],
+ axis=1)
+ if self.revisions_details_start > 0:
+ df['revision date'] = (
+ self.updated.model._index[self.revisions_details_start - 1])
+ df['revised variable'] = 'all prior revisions'
+ df = (df.set_index(['revision date', 'revised variable'], append=True)
+ .reorder_levels([2, 3, 0, 1]))
+ return df
+
@property
def revision_details_by_impact(self):
"""
@@ -434,6 +542,11 @@ def revision_details_by_impact(self):
new datapoints. That information can be found in the
`impacts` or `details_by_impact` tables.
+ Grouped impacts are shown in this table, with a "revision date" equal
+ to the last period prior to which detailed revisions were computed and
+ with "revised variable" set to the string "all prior revisions". For
+ these rows, all columns except "impact" will be set to NaNs.
+
This form of the details table is organized so that the impacted
dates / variables are first in the index. This is convenient for
slicing by impacted variables / dates to view the details of data
@@ -453,21 +566,28 @@ def revision_details_by_impact(self):
details_by_impact
impacts
"""
- weights = self.revision_weights.stack(level=[0, 1])
+ weights = self.revision_weights.stack(level=[0, 1], **FUTURE_STACK)
df = pd.concat([
self.revised.reindex(weights.index),
self.revised_prev.rename('observed (prev)').reindex(weights.index),
self.revisions.reindex(weights.index),
weights.rename('weight'),
- (self.revisions * weights).rename('impact'),
+ (self.revisions.reindex(weights.index) * weights).rename('impact'),
], axis=1)
+ if self.n_revisions_grouped > 0:
+ df = pd.concat([df, self._revision_grouped_impacts])
+ # Explicitly set names for compatibility with pandas=1.2.5
+ df.index = df.index.set_names(
+ ['revision date', 'revised variable',
+ 'impact date', 'impacted variable'])
+
df = df.reorder_levels([2, 3, 0, 1]).sort_index()
if self.impacted_variable is not None and len(df) > 0:
df = df.loc[np.s_[:, self.impacted_variable], :]
- mask = np.abs(df['weight']) > self.tolerance
+ mask = np.abs(df['impact']) > self.tolerance
return df[mask]
@property
@@ -527,7 +647,8 @@ def details_by_update(self):
details_by_impact
impacts
"""
- df = self.weights.stack(level=[0, 1]).rename('weight').to_frame()
+ s = self.weights.stack(level=[0, 1], **FUTURE_STACK)
+ df = s.rename('weight').to_frame()
if len(self.updates_iloc):
df['forecast (prev)'] = self.update_forecasts
df['observed'] = self.update_realized
@@ -550,7 +671,7 @@ def details_by_update(self):
details = details.loc[
np.s_[:, :, :, :, :, self.impacted_variable], :]
- mask = np.abs(details['weight']) > self.tolerance
+ mask = np.abs(details['impact']) > self.tolerance
return details[mask]
@property
@@ -589,7 +710,12 @@ def revision_details_by_update(self):
release.
This table does not summarize the impacts or show the effect of
- revisions. That information can be found in the `impacts` table.
+ new datapoints, see `details_by_update` instead.
+
+ Grouped impacts are shown in this table, with a "revision date" equal
+ to the last period prior to which detailed revisions were computed and
+ with "revised variable" set to the string "all prior revisions". For
+ these rows, all columns except "impact" will be set to NaNs.
This form of the details table is organized so that the revision
dates / variables are first in the index, and in this table the index
@@ -609,16 +735,23 @@ def revision_details_by_update(self):
details_by_impact
impacts
"""
- weights = self.revision_weights.stack(level=[0, 1])
+ weights = self.revision_weights.stack(level=[0, 1], **FUTURE_STACK)
df = pd.concat([
self.revised_prev.rename('observed (prev)').reindex(weights.index),
self.revised.reindex(weights.index),
self.revisions.reindex(weights.index),
weights.rename('weight'),
- (self.revisions * weights).rename('impact'),
+ (self.revisions.reindex(weights.index) * weights).rename('impact'),
], axis=1)
+ if self.n_revisions_grouped > 0:
+ df = pd.concat([df, self._revision_grouped_impacts])
+ # Explicitly set names for compatibility with pandas=1.2.5
+ df.index = df.index.set_names(
+ ['revision date', 'revised variable',
+ 'impact date', 'impacted variable'])
+
details = (df.set_index(['observed (prev)', 'revised'], append=True)
.reorder_levels([
'revision date', 'revised variable', 'revised',
@@ -630,7 +763,7 @@ def revision_details_by_update(self):
details = details.loc[
np.s_[:, :, :, :, :, self.impacted_variable], :]
- mask = np.abs(details['weight']) > self.tolerance
+ mask = np.abs(details['impact']) > self.tolerance
return details[mask]
@property
@@ -682,9 +815,9 @@ def impacts(self):
self.post_impacted_forecasts.unstack().rename('estimate (new)')],
axis=1)
impacts['impact of revisions'] = (
- impacts['impact of revisions'].fillna(0))
+ impacts['impact of revisions'].astype(float).fillna(0))
impacts['impact of news'] = (
- impacts['impact of news'].fillna(0))
+ impacts['impact of news'].astype(float).fillna(0))
impacts['total impact'] = (impacts['impact of revisions'] +
impacts['impact of news'])
impacts = impacts.reorder_levels([1, 0]).sort_index()
@@ -756,7 +889,7 @@ def summary_impacts(self, impact_date=None, impacted_variable=None,
# Default is to only show the revisions columns if there were any
# revisions (otherwise it would just be a column of zeros)
if show_revisions_columns is None:
- show_revisions_columns = len(self.revisions_iloc) > 0
+ show_revisions_columns = self.n_revisions > 0
# Select only the variables / dates of interest
s = list(np.s_[:, :])
@@ -959,7 +1092,7 @@ def summary_details(self, source='news', impact_date=None,
}
else:
raise ValueError(f'Invalid `source`: {source}. Must be "news" or'
- ' "impacts".')
+ ' "revisions".')
# Make the first index level the groupby level
groupby = groupby.lower().replace('_', ' ')
@@ -1081,10 +1214,11 @@ def create_table(details, removed_levels):
if multiple_tables:
details_table = []
- for item in details[groupby].unique():
- mask = details[groupby] == item
- item_details = details[mask].drop(groupby, axis=1)
- item_removed_levels = [f'{groupby} = {item}'] + removed_levels
+ for item in details[columns[groupby]].unique():
+ mask = details[columns[groupby]] == item
+ item_details = details[mask].drop(columns[groupby], axis=1)
+ item_removed_levels = (
+ [f'{columns[groupby]} = {item}'] + removed_levels)
details_table.append(create_table(item_details,
item_removed_levels))
else:
@@ -1112,13 +1246,17 @@ def summary_revisions(self, sparsify=True):
- `observed (prev)` : the observed value prior to the revision
- `revised` : the new value after the revision
- `revision` : the new value after the revision
+ - `detailed impacts computed` : whether detailed impacts were
+ computed for this revision
"""
data = pd.merge(
- self.data_revisions, self.revisions, left_index=True,
+ self.data_revisions, self.revisions_all, left_index=True,
right_index=True).sort_index().reset_index()
+ data = data[['revision date', 'revised variable', 'observed (prev)',
+ 'revision', 'detailed impacts computed']]
data[['revision date', 'revised variable']] = (
data[['revision date', 'revised variable']].applymap(str))
- data.iloc[:, 2:] = data.iloc[:, 2:].applymap(
+ data.iloc[:, 2:-1] = data.iloc[:, 2:-1].applymap(
lambda num: '' if pd.isnull(num) else '%.2f' % num)
# Sparsify the date column
@@ -1370,7 +1508,7 @@ def get_sample(model):
table_ix += 1
# Revisions
- if include_revisions_tables and len(self.revisions_iloc) > 0:
+ if include_revisions_tables and self.n_revisions > 0:
summary.tables.insert(
table_ix, self.summary_revisions(sparsify=sparsify))
table_ix += 1
diff --git a/statsmodels/tsa/statespace/structural.py b/statsmodels/tsa/statespace/structural.py
index 6896c81d500..76b6597c9a9 100644
--- a/statsmodels/tsa/statespace/structural.py
+++ b/statsmodels/tsa/statespace/structural.py
@@ -597,7 +597,7 @@ def __init__(self, endog, level=False, trend=False, seasonal=None,
# of the data.
if cycle_period_bounds is None:
freq = self.data.freq[0] if self.data.freq is not None else ''
- if freq == 'A':
+ if freq in ('A', 'Y'):
cycle_period_bounds = (1.5, 12)
elif freq == 'Q':
cycle_period_bounds = (1.5*4, 12*4)
diff --git a/statsmodels/tsa/statespace/tests/results/results_structural.py b/statsmodels/tsa/statespace/tests/results/results_structural.py
index 61e35801a6c..53c8e2c306b 100644
--- a/statsmodels/tsa/statespace/tests/results/results_structural.py
+++ b/statsmodels/tsa/statespace/tests/results/results_structural.py
@@ -283,7 +283,7 @@
# Annual frequency dataset
{'level': 'lltrend', 'autoregressive': 1, 'cycle': True,
'stochastic_cycle': True, 'seasonal': 4, 'autoregressive': 1,
- 'exog': True, 'mle_regression': False, 'freq': 'AS'},
+ 'exog': True, 'mle_regression': False, 'freq': 'YS'},
# Quarterly frequency dataset
{'level': 'lltrend', 'autoregressive': 1, 'cycle': True,
'stochastic_cycle': True, 'seasonal': 4, 'autoregressive': 1,
@@ -295,7 +295,7 @@
# Minutely frequency dataset
{'level': 'lltrend', 'autoregressive': 1, 'cycle': True,
'stochastic_cycle': True, 'seasonal': 4, 'autoregressive': 1,
- 'exog': True, 'mle_regression': False, 'freq': 'T',
+ 'exog': True, 'mle_regression': False, 'freq': 'min',
'cycle_period_bounds': (1.5*12, 12*12)},
],
'params': [0.0001, 0.01, 0.06, 0.0001, 0.0001, 0.1, 2*pi / 10, 0.2],
diff --git a/statsmodels/tsa/statespace/tests/test_chandrasekhar.py b/statsmodels/tsa/statespace/tests/test_chandrasekhar.py
index 31d16c12591..48aecae2764 100644
--- a/statsmodels/tsa/statespace/tests/test_chandrasekhar.py
+++ b/statsmodels/tsa/statespace/tests/test_chandrasekhar.py
@@ -20,8 +20,12 @@ def check_output(res_chand, res_orig, memory_conserve=False):
# Test loglike
params = res_orig.params
assert_allclose(res_chand.llf, res_orig.llf)
- assert_allclose(res_chand.model.score_obs(params),
- res_orig.model.score_obs(params), atol=1e-10)
+ assert_allclose(
+ res_chand.model.score_obs(params),
+ res_orig.model.score_obs(params),
+ rtol=5e-5,
+ atol=5e-6
+ )
# Test state space representation matrices
for name in res_chand.model.ssm.shapes:
diff --git a/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py b/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py
index 4a65944abe2..683ec310215 100644
--- a/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py
+++ b/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py
@@ -4,7 +4,12 @@
These test standard that the state space model is set up as desired, that
expected exceptions are raised, etc.
"""
-from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
+from statsmodels.compat.pandas import (
+ MONTH_END,
+ QUARTER_END,
+ assert_frame_equal,
+ assert_series_equal,
+)
import numpy as np
from numpy.testing import assert_, assert_allclose, assert_equal
@@ -20,7 +25,6 @@
)
from statsmodels.tsa.statespace.tests import test_dynamic_factor_mq_monte_carlo
-
SKIP_MONTE_CARLO_TESTS = True
@@ -730,7 +734,7 @@ def test_invalid_model_specification():
dta_period_M = pd.DataFrame(
dta, index=pd.period_range(start='2000', periods=10, freq='M'))
dta_date_M = pd.DataFrame(
- dta, index=pd.date_range(start='2000', periods=10, freq='M'))
+ dta, index=pd.date_range(start='2000', periods=10, freq=MONTH_END))
dta_period_Q = pd.DataFrame(
dta, index=pd.period_range(start='2000', periods=10, freq='Q'))
@@ -838,9 +842,18 @@ def test_invalid_model_specification():
dta_date_M, endog_quarterly=dta_date_W)
-@pytest.mark.parametrize('freq_Q', ['Q', 'Q-DEC', 'Q-JAN', 'QS',
- 'QS-DEC', 'QS-APR'])
-@pytest.mark.parametrize('freq_M', ['M', 'MS'])
+@pytest.mark.parametrize(
+ "freq_Q",
+ [
+ QUARTER_END,
+ f"{QUARTER_END}-DEC",
+ f"{QUARTER_END}-JAN",
+ "QS",
+ "QS-DEC",
+ "QS-APR",
+ ],
+)
+@pytest.mark.parametrize("freq_M", [MONTH_END, "MS"])
def test_date_indexes(reset_randomstate, freq_M, freq_Q):
# Test that using either PeriodIndex or DatetimeIndex for monthly or
# quarterly data, with a variety of DatetimeIndex frequencies, works
diff --git a/statsmodels/tsa/statespace/tests/test_impulse_responses.py b/statsmodels/tsa/statespace/tests/test_impulse_responses.py
index be901ac03c7..0a2987e36ba 100644
--- a/statsmodels/tsa/statespace/tests/test_impulse_responses.py
+++ b/statsmodels/tsa/statespace/tests/test_impulse_responses.py
@@ -4,17 +4,24 @@
Author: Chad Fulton
License: Simplified-BSD
"""
+from statsmodels.compat.pandas import MONTH_END
+
import warnings
import numpy as np
+from numpy.testing import assert_, assert_allclose
import pandas as pd
-from scipy.stats import ortho_group
import pytest
-from numpy.testing import assert_, assert_allclose
+from scipy.stats import ortho_group
from statsmodels.tools.sm_exceptions import EstimationWarning
-from statsmodels.tsa.statespace import (mlemodel, sarimax, structural, varmax,
- dynamic_factor)
+from statsmodels.tsa.statespace import (
+ dynamic_factor,
+ mlemodel,
+ sarimax,
+ structural,
+ varmax,
+)
from statsmodels.tsa.vector_ar.tests.test_var import get_macrodata
@@ -660,7 +667,7 @@ def test_pandas_univariate_rangeindex():
def test_pandas_univariate_dateindex():
# Impulse responses still have RangeIndex (i.e. aren't wrapped with dates)
- ix = pd.date_range(start='2000', periods=1, freq='M')
+ ix = pd.date_range(start='2000', periods=1, freq=MONTH_END)
endog = pd.Series(np.zeros(1), index=ix)
mod = sarimax.SARIMAX(endog)
res = mod.filter([0.5, 1.])
@@ -685,7 +692,7 @@ def test_pandas_multivariate_rangeindex():
def test_pandas_multivariate_dateindex():
# Impulse responses still have RangeIndex (i.e. aren't wrapped with dates)
- ix = pd.date_range(start='2000', periods=1, freq='M')
+ ix = pd.date_range(start='2000', periods=1, freq=MONTH_END)
endog = pd.DataFrame(np.zeros((1, 2)), index=ix)
mod = varmax.VARMAX(endog, trend='n')
res = mod.filter([0.5, 0., 0., 0.2, 1., 0., 1.])
@@ -698,7 +705,7 @@ def test_pandas_multivariate_dateindex():
def test_pandas_anchor():
# Test that anchor with dates works
- ix = pd.date_range(start='2000', periods=10, freq='M')
+ ix = pd.date_range(start='2000', periods=10, freq=MONTH_END)
endog = pd.DataFrame(np.zeros((10, 2)), index=ix)
mod = TVSS(endog)
res = mod.filter([])
diff --git a/statsmodels/tsa/statespace/tests/test_mlemodel.py b/statsmodels/tsa/statespace/tests/test_mlemodel.py
index af6c70f9466..7842b7cfcd3 100644
--- a/statsmodels/tsa/statespace/tests/test_mlemodel.py
+++ b/statsmodels/tsa/statespace/tests/test_mlemodel.py
@@ -4,22 +4,35 @@
Author: Chad Fulton
License: Simplified-BSD
"""
+from statsmodels.compat.pandas import MONTH_END
+
import os
import re
import warnings
import numpy as np
+from numpy.testing import (
+ assert_,
+ assert_allclose,
+ assert_almost_equal,
+ assert_equal,
+ assert_raises,
+)
import pandas as pd
import pytest
-from statsmodels.tsa.statespace import (sarimax, varmax, kalman_filter,
- kalman_smoother)
-from statsmodels.tsa.statespace.mlemodel import MLEModel, MLEResultsWrapper
from statsmodels.datasets import nile
-from numpy.testing import (
- assert_, assert_almost_equal, assert_equal, assert_allclose, assert_raises)
+from statsmodels.tsa.statespace import (
+ kalman_filter,
+ kalman_smoother,
+ sarimax,
+ varmax,
+)
+from statsmodels.tsa.statespace.mlemodel import MLEModel, MLEResultsWrapper
from statsmodels.tsa.statespace.tests.results import (
- results_sarimax, results_var_misc)
+ results_sarimax,
+ results_var_misc,
+)
current_path = os.path.dirname(os.path.abspath(__file__))
@@ -1173,13 +1186,13 @@ def test_states_index_periodindex():
def test_states_index_dateindex():
nobs = 10
- ix = pd.date_range(start='2000', periods=nobs, freq='M')
+ ix = pd.date_range(start='2000', periods=nobs, freq=MONTH_END)
endog = pd.Series(np.zeros(nobs), index=ix)
mod = sarimax.SARIMAX(endog, order=(2, 0, 0))
res = mod.smooth([0.5, 0.1, 1.0])
- predicted_ix = pd.date_range(start=ix[0], periods=nobs + 1, freq='M')
+ predicted_ix = pd.date_range(start=ix[0], periods=nobs + 1, freq=MONTH_END)
cols = pd.Index(['state.0', 'state.1'])
check_states_index(res.states, ix, predicted_ix, cols)
diff --git a/statsmodels/tsa/statespace/tests/test_news.py b/statsmodels/tsa/statespace/tests/test_news.py
index cfe47eb90c8..e03511752ce 100644
--- a/statsmodels/tsa/statespace/tests/test_news.py
+++ b/statsmodels/tsa/statespace/tests/test_news.py
@@ -5,6 +5,7 @@
License: BSD-3
"""
from statsmodels.compat.pandas import (
+ FUTURE_STACK,
assert_frame_equal,
assert_series_equal,
)
@@ -157,8 +158,8 @@ def check_news(news, revisions, updates, impact_dates, impacted_variables,
assert_(np.all(news.revision_impacts.isnull()))
# Total impacts
- total_impacts = (news.revision_impacts.fillna(0) +
- news.update_impacts.fillna(0))
+ total_impacts = (news.revision_impacts.astype(float).fillna(0) +
+ news.update_impacts.astype(float).fillna(0))
assert_allclose(news.total_impacts, total_impacts, atol=1e-12)
# - Impacted variable forecasts ------------------------------------------
@@ -178,7 +179,7 @@ def check_news(news, revisions, updates, impact_dates, impacted_variables,
# - Table: data revisions ------------------------------------------------
assert_equal(news.data_revisions.columns.tolist(),
- ['revised', 'observed (prev)'])
+ ['revised', 'observed (prev)', 'detailed impacts computed'])
assert_equal(news.data_revisions.index.names,
['revision date', 'revised variable'])
assert_(news.data_revisions.index.equals(revisions_index))
@@ -255,23 +256,37 @@ def check_news(news, revisions, updates, impact_dates, impacted_variables,
desired = ['impact date', 'impacted variable']
assert_equal(impacts.index.names, desired)
- assert_allclose(impacts.loc[:, 'estimate (prev)'],
- news.prev_impacted_forecasts.stack(), atol=1e-12)
- assert_allclose(impacts.loc[:, 'impact of revisions'],
- news.revision_impacts.fillna(0).stack(), atol=1e-12)
- assert_allclose(impacts.loc[:, 'impact of news'],
- news.update_impacts.fillna(0).stack(), atol=1e-12)
- assert_allclose(impacts.loc[:, 'total impact'],
- news.total_impacts.stack(), atol=1e-12)
- assert_allclose(impacts.loc[:, 'estimate (new)'],
- news.post_impacted_forecasts.stack(), atol=1e-12)
-
-
-# @pytest.mark.parametrize('revisions', [True, False])
-# @pytest.mark.parametrize('updates', [True, False])
-@pytest.mark.parametrize('revisions', [True])
-@pytest.mark.parametrize('updates', [True])
-def test_sarimax_time_invariant(revisions, updates):
+ assert_allclose(
+ impacts.loc[:, "estimate (prev)"],
+ news.prev_impacted_forecasts.stack(**FUTURE_STACK),
+ atol=1e-12,
+ )
+ assert_allclose(
+ impacts.loc[:, "impact of revisions"],
+ news.revision_impacts.astype(float).fillna(0).stack(**FUTURE_STACK),
+ atol=1e-12,
+ )
+ assert_allclose(
+ impacts.loc[:, "impact of news"],
+ news.update_impacts.astype(float).fillna(0).stack(**FUTURE_STACK),
+ atol=1e-12,
+ )
+ assert_allclose(
+ impacts.loc[:, "total impact"],
+ news.total_impacts.stack(**FUTURE_STACK),
+ atol=1e-12,
+ )
+ assert_allclose(
+ impacts.loc[:, "estimate (new)"],
+ news.post_impacted_forecasts.stack(**FUTURE_STACK),
+ atol=1e-12,
+ )
+
+
+@pytest.mark.parametrize('revisions', [True, False])
+@pytest.mark.parametrize('updates', [True, False])
+@pytest.mark.parametrize('revisions_details_start', [True, False, -2])
+def test_sarimax_time_invariant(revisions, updates, revisions_details_start):
# Construct previous and updated datasets
endog = dta['infl'].copy()
comparison_type = None
@@ -291,7 +306,8 @@ def test_sarimax_time_invariant(revisions, updates):
mod = sarimax.SARIMAX(endog1)
res = mod.smooth([0.5, 1.0])
news = res.news(endog2, start='2009Q2', end='2010Q1',
- comparison_type=comparison_type)
+ comparison_type=comparison_type,
+ revisions_details_start=revisions_details_start)
# Compute the true values for each combination of (revsions, updates)
impact_dates = pd.period_range(start='2009Q2', end='2010Q1', freq='Q')
@@ -998,3 +1014,293 @@ def test_invalid():
msg = 'The index associated with the updated results is not a superset'
with pytest.raises(ValueError, match=msg):
res.news(endog.values)
+
+
+@pytest.mark.parametrize('revisions_details_start', [True, -10, 200])
+def test_detailed_revisions(revisions_details_start):
+ # Construct original and revised datasets
+ y = np.log(dta[['realgdp', 'realcons',
+ 'realinv', 'cpi']]).diff().iloc[1:] * 100
+ y.iloc[-1, 0] = np.nan
+
+ y_revised = y.copy()
+ revisions = {
+ ('2009Q2', 'realgdp'): 1.1,
+ ('2009Q3', 'realcons'): 0.5,
+ ('2009Q2', 'realinv'): -0.3,
+ ('2009Q2', 'cpi'): 0.2,
+ ('2009Q3', 'cpi'): 0.2,
+ }
+ for key, diff in revisions.items():
+ y_revised.loc[key] += diff
+
+ # Create model and results
+ mod = varmax.VARMAX(y, trend='n')
+ ar_coeff = {
+ 'realgdp': 0.9,
+ 'realcons': 0.8,
+ 'realinv': 0.7,
+ 'cpi': 0.6
+ }
+ params = np.r_[np.diag(list(ar_coeff.values())).flatten(),
+ [1, 0, 1, 0, 0, 1, 0, 0, 0, 1]]
+ res = mod.smooth(params)
+ res_revised = res.apply(y_revised)
+ news = res_revised.news(res, comparison_type='previous', tolerance=-1,
+ revisions_details_start=revisions_details_start)
+
+ # Tests
+ data_revisions = news.data_revisions
+ revision_details = news.revision_details_by_update.reset_index([2, 3])
+
+ for key, diff in revisions.items():
+ assert_allclose(data_revisions.loc[key, 'revised'], y_revised.loc[key])
+ assert_allclose(data_revisions.loc[key, 'observed (prev)'], y.loc[key])
+ assert_equal(
+ # Need to manually cast to numpy for compatibility with
+ # pandas==1.2.5
+ np.array(data_revisions.loc[key, 'detailed impacts computed']),
+ True)
+ assert_allclose(revision_details.loc[key, 'revised'],
+ y_revised.loc[key])
+ assert_allclose(revision_details.loc[key, 'observed (prev)'],
+ y.loc[key])
+ assert_allclose(revision_details.loc[key, 'revision'], diff)
+
+ # For revisions to the impact period, the own-weight is equal to 1.
+ key = ('2009Q3', 'realcons', '2009Q3', 'realcons')
+ assert_allclose(revision_details.loc[key, 'weight'], 1)
+ assert_allclose(revision_details.loc[key, 'impact'],
+ revisions[('2009Q3', 'realcons')])
+ key = ('2009Q3', 'cpi', '2009Q3', 'cpi')
+ assert_allclose(revision_details.loc[key, 'weight'], 1)
+ assert_allclose(revision_details.loc[key, 'impact'],
+ revisions[('2009Q3', 'cpi')])
+
+ # For revisions just before the impact period, all weights are equal to
+ # zero unless there is a missing value in the impact period, in which case
+ # the weight is equal to the AR coefficient
+ key = ('2009Q2', 'realgdp', '2009Q3', 'realgdp')
+ assert_allclose(revision_details.loc[key, 'weight'], ar_coeff['realgdp'])
+ assert_allclose(revision_details.loc[key, 'impact'],
+ ar_coeff['realgdp'] * revisions[('2009Q2', 'realgdp')])
+ key = ('2009Q2', 'realinv', '2009Q3', 'realinv')
+ assert_allclose(revision_details.loc[key, 'weight'], 0)
+ assert_allclose(revision_details.loc[key, 'impact'], 0)
+ key = ('2009Q2', 'cpi', '2009Q3', 'cpi')
+ assert_allclose(revision_details.loc[key, 'weight'], 0)
+ assert_allclose(revision_details.loc[key, 'impact'], 0)
+
+ # Check the impacts table
+
+ # Since we only have revisions, all impacts are due to revisions
+ assert_allclose(news.impacts['impact of news'], 0)
+ assert_allclose(news.impacts['total impact'],
+ news.impacts['impact of revisions'])
+
+ # Check the values for estimates
+ for name in ['cpi', 'realcons', 'realinv']:
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q3', name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q3', name])
+ # Have to handle real GDP separately since the 2009Q3 value is missing
+ name = 'realgdp'
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q2', name] * ar_coeff[name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q2', name] * ar_coeff[name])
+
+ # Check that the values of revision impacts sum up correctly
+ assert_allclose(
+ news.impacts['impact of revisions'],
+ revision_details.groupby(level=[2, 3]).sum()['impact']
+ )
+
+
+@pytest.mark.parametrize('revisions_details_start', [False, 202])
+def test_grouped_revisions(revisions_details_start):
+ # Tests for computing revision impacts when all revisions are grouped
+ # together (i.e. no detailed impacts are computed )
+ # Construct original and revised datasets
+ y = np.log(dta[['realgdp', 'realcons',
+ 'realinv', 'cpi']]).diff().iloc[1:] * 100
+ y.iloc[-1, 0] = np.nan
+
+ y_revised = y.copy()
+ revisions = {
+ ('2009Q2', 'realgdp'): 1.1,
+ ('2009Q3', 'realcons'): 0.5,
+ ('2009Q2', 'realinv'): -0.3,
+ ('2009Q2', 'cpi'): 0.2,
+ ('2009Q3', 'cpi'): 0.2,
+ }
+ for key, diff in revisions.items():
+ y_revised.loc[key] += diff
+
+ # Create model and results
+ mod = varmax.VARMAX(y, trend='n')
+ ar_coeff = {
+ 'realgdp': 0.9,
+ 'realcons': 0.8,
+ 'realinv': 0.7,
+ 'cpi': 0.6
+ }
+ params = np.r_[np.diag(list(ar_coeff.values())).flatten(),
+ [1, 0, 1, 0, 0, 1, 0, 0, 0, 1]]
+ res = mod.smooth(params)
+ res_revised = res.apply(y_revised)
+ news = res_revised.news(res, comparison_type='previous', tolerance=-1,
+ revisions_details_start=revisions_details_start)
+
+ # Tests
+ data_revisions = news.data_revisions
+ revision_details = news.revision_details_by_update.reset_index([2, 3])
+
+ for key, diff in revisions.items():
+ assert_allclose(data_revisions.loc[key, 'revised'], y_revised.loc[key])
+ assert_allclose(data_revisions.loc[key, 'observed (prev)'], y.loc[key])
+ assert_equal(
+ # Need to manually cast to numpy for compatibility with
+ # pandas==1.2.5
+ np.array(data_revisions.loc[key, 'detailed impacts computed']),
+ False)
+
+ # For grouped data, should not have any of revised, observed (prev),
+ # revision, weight
+ key = ('2009Q3', 'all prior revisions', '2009Q3')
+ cols = ['revised', 'observed (prev)', 'revision', 'weight']
+ assert np.all(revision_details.loc[key, cols].isnull())
+
+ # Expected grouped impacts are the sum of the detailed impacts from
+ # `test_detailed_revisions`
+ assert_allclose(revision_details.loc[key + ('realgdp',), 'impact'],
+ ar_coeff['realgdp'] * revisions[('2009Q2', 'realgdp')])
+ assert_allclose(revision_details.loc[key + ('realcons',), 'impact'],
+ revisions[('2009Q3', 'realcons')])
+ assert_allclose(revision_details.loc[key + ('realinv',), 'impact'], 0)
+ assert_allclose(revision_details.loc[key + ('cpi',), 'impact'],
+ revisions[('2009Q3', 'cpi')])
+
+ # Check the values for estimates
+ for name in ['cpi', 'realcons', 'realinv']:
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q3', name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q3', name])
+ # Have to handle real GDP separately since the 2009Q3 value is missing
+ name = 'realgdp'
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q2', name] * ar_coeff[name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q2', name] * ar_coeff[name])
+
+ # Check that the values of revision impacts sum up correctly
+ assert_allclose(
+ news.impacts['impact of revisions'],
+ revision_details.groupby(level=[2, 3]).sum()['impact']
+ )
+
+
+@pytest.mark.parametrize('revisions_details_start', [-1, 201])
+def test_mixed_revisions(revisions_details_start):
+ # Construct original and revised datasets
+ y = np.log(dta[['realgdp', 'realcons',
+ 'realinv', 'cpi']]).diff().iloc[1:] * 100
+ y.iloc[-1, 0] = np.nan
+
+ y_revised = y.copy()
+ revisions = {
+ ('2009Q2', 'realgdp'): 1.1,
+ ('2009Q3', 'realcons'): 0.5,
+ ('2009Q2', 'realinv'): -0.3,
+ ('2009Q2', 'cpi'): 0.2,
+ ('2009Q3', 'cpi'): 0.2,
+ }
+ for key, diff in revisions.items():
+ y_revised.loc[key] += diff
+
+ # Create model and results
+ mod = varmax.VARMAX(y, trend='n')
+ ar_coeff = {
+ 'realgdp': 0.9,
+ 'realcons': 0.8,
+ 'realinv': 0.7,
+ 'cpi': 0.6
+ }
+ params = np.r_[np.diag(list(ar_coeff.values())).flatten(),
+ [1, 0, 1, 0, 0, 1, 0, 0, 0, 1]]
+ res = mod.smooth(params)
+ res_revised = res.apply(y_revised)
+ news = res_revised.news(res, comparison_type='previous', tolerance=-1,
+ revisions_details_start=revisions_details_start)
+
+ # Tests
+ data_revisions = news.data_revisions
+ revision_details = news.revision_details_by_update.reset_index([2, 3])
+
+ for key, diff in revisions.items():
+ assert_allclose(data_revisions.loc[key, 'revised'], y_revised.loc[key])
+ assert_allclose(data_revisions.loc[key, 'observed (prev)'], y.loc[key])
+ # Revisions to 2009Q2 are grouped (i.e. no details are computed),
+ # while revisions to 2009Q3 have detailed impacts computed
+ expected_details_computed = key[0] == '2009Q3'
+ assert_equal(
+ # Need to manually cast to numpy for compatibility with
+ # pandas==1.2.5
+ np.array(data_revisions.loc[key, 'detailed impacts computed']),
+ expected_details_computed)
+
+ # For grouped data, should not have any of revised, observed (prev),
+ # revision, weight
+ key = ('2009Q2', 'all prior revisions', '2009Q3')
+ cols = ['revised', 'observed (prev)', 'revision', 'weight']
+ assert np.all(revision_details.loc[key, cols].isnull())
+
+ # Expected grouped impacts are the sum of the detailed impacts from
+ # `test_detailed_revisions` for revisions to 2009Q2
+ assert_allclose(revision_details.loc[key + ('realgdp',), 'impact'],
+ ar_coeff['realgdp'] * revisions[('2009Q2', 'realgdp')])
+ assert_allclose(revision_details.loc[key + ('realinv',), 'impact'], 0)
+
+ # Expected detailed impacts are for revisions to 2009Q3
+ # (for revisions to the impact period, the own-weight is equal to 1)
+ key = ('2009Q3', 'realcons', '2009Q3', 'realcons')
+ assert_allclose(revision_details.loc[key, 'weight'], 1)
+ assert_allclose(revision_details.loc[key, 'impact'],
+ revisions[('2009Q3', 'realcons')])
+ key = ('2009Q3', 'cpi', '2009Q3', 'cpi')
+ assert_allclose(revision_details.loc[key, 'weight'], 1)
+ assert_allclose(revision_details.loc[key, 'impact'],
+ revisions[('2009Q3', 'cpi')])
+
+ # Check the values for estimates
+ for name in ['cpi', 'realcons', 'realinv']:
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q3', name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q3', name])
+ # Have to handle real GDP separately since the 2009Q3 value is missing
+ name = 'realgdp'
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (new)'],
+ y_revised.loc['2009Q2', name] * ar_coeff[name])
+ assert_allclose(
+ news.impacts.loc[('2009Q3', name), 'estimate (prev)'],
+ y.loc['2009Q2', name] * ar_coeff[name])
+
+ # Check that the values of revision impacts sum up correctly
+ assert_allclose(
+ news.impacts['impact of revisions'],
+ revision_details.groupby(level=[2, 3]).sum()['impact']
+ )
diff --git a/statsmodels/tsa/statespace/tests/test_simulate.py b/statsmodels/tsa/statespace/tests/test_simulate.py
index ec6b3d147d1..4b6486193b0 100644
--- a/statsmodels/tsa/statespace/tests/test_simulate.py
+++ b/statsmodels/tsa/statespace/tests/test_simulate.py
@@ -5,17 +5,26 @@
License: Simplified-BSD
"""
+from statsmodels.compat.pandas import MONTH_END
+
import numpy as np
-import pandas as pd
from numpy.testing import assert_, assert_allclose, assert_equal
+import pandas as pd
import pytest
from scipy.signal import lfilter
+from statsmodels.tools.sm_exceptions import (
+ EstimationWarning,
+ SpecificationWarning,
+)
+from statsmodels.tsa.statespace import (
+ dynamic_factor,
+ sarimax,
+ structural,
+ varmax,
+)
+
from .test_impulse_responses import TVSS
-from statsmodels.tools.sm_exceptions import SpecificationWarning, \
- EstimationWarning
-from statsmodels.tsa.statespace import (sarimax, structural, varmax,
- dynamic_factor)
def test_arma_lfilter():
@@ -1578,7 +1587,7 @@ def test_pandas_univariate_rangeindex_repetitions():
def test_pandas_univariate_dateindex():
# Simulation will maintain have date index
- ix = pd.date_range(start='2000', periods=2, freq='M')
+ ix = pd.date_range(start='2000', periods=2, freq=MONTH_END)
endog = pd.Series(np.zeros(2), index=ix)
mod = sarimax.SARIMAX(endog)
res = mod.filter([0.5, 1.])
@@ -1586,7 +1595,7 @@ def test_pandas_univariate_dateindex():
# Default simulate anchors to the start of the sample
actual = res.simulate(2, state_shocks=np.zeros(2),
initial_state=np.zeros(1))
- ix = pd.date_range(start='2000-01', periods=2, freq='M')
+ ix = pd.date_range(start='2000-01', periods=2, freq=MONTH_END)
desired = pd.Series([0, 0], index=ix)
assert_allclose(actual, desired)
assert_(actual.index.equals(desired.index))
@@ -1594,14 +1603,14 @@ def test_pandas_univariate_dateindex():
# Alternative anchor changes the index
actual = res.simulate(2, anchor=2, state_shocks=np.zeros(2),
initial_state=np.zeros(1))
- ix = pd.date_range(start='2000-03', periods=2, freq='M')
+ ix = pd.date_range(start='2000-03', periods=2, freq=MONTH_END)
desired = pd.Series([0, 0], index=ix)
assert_allclose(actual, desired)
def test_pandas_univariate_dateindex_repetitions():
# Simulation will maintain have date index
- ix = pd.date_range(start='2000', periods=2, freq='M')
+ ix = pd.date_range(start='2000', periods=2, freq=MONTH_END)
endog = pd.Series(np.zeros(2), index=ix)
mod = sarimax.SARIMAX(endog)
res = mod.filter([0.5, 1.])
@@ -1609,7 +1618,7 @@ def test_pandas_univariate_dateindex_repetitions():
# Default simulate anchors to the start of the sample
actual = res.simulate(2, state_shocks=np.zeros(2),
initial_state=np.zeros(1), repetitions=2)
- ix = pd.date_range(start='2000-01', periods=2, freq='M')
+ ix = pd.date_range(start='2000-01', periods=2, freq=MONTH_END)
columns = pd.MultiIndex.from_product([['y'], [0, 1]])
desired = pd.DataFrame(np.zeros((2, 2)), index=ix, columns=columns)
assert_allclose(actual, desired)
@@ -1618,7 +1627,7 @@ def test_pandas_univariate_dateindex_repetitions():
# Alternative anchor changes the index
actual = res.simulate(2, anchor=2, state_shocks=np.zeros(2),
initial_state=np.zeros(1), repetitions=2)
- ix = pd.date_range(start='2000-03', periods=2, freq='M')
+ ix = pd.date_range(start='2000-03', periods=2, freq=MONTH_END)
columns = pd.MultiIndex.from_product([['y'], [0, 1]])
desired = pd.DataFrame(np.zeros((2, 2)), index=ix, columns=columns)
assert_allclose(actual, desired)
@@ -1674,7 +1683,7 @@ def test_pandas_multivariate_rangeindex_repetitions():
def test_pandas_multivariate_dateindex():
# Simulate will also have RangeIndex
- ix = pd.date_range(start='2000', periods=2, freq='M')
+ ix = pd.date_range(start='2000', periods=2, freq=MONTH_END)
endog = pd.DataFrame(np.zeros((2, 2)), index=ix)
mod = varmax.VARMAX(endog, trend='n')
res = mod.filter([0.5, 0., 0., 0.2, 1., 0., 1.])
@@ -1688,7 +1697,7 @@ def test_pandas_multivariate_dateindex():
# Alternative anchor changes the index
actual = res.simulate(2, anchor=2, state_shocks=np.zeros((2, 2)),
initial_state=np.zeros(2))
- ix = pd.date_range(start='2000-03', periods=2, freq='M')
+ ix = pd.date_range(start='2000-03', periods=2, freq=MONTH_END)
desired = pd.DataFrame(np.zeros((2, 2)), index=ix)
assert_allclose(actual, desired)
assert_(actual.index.equals(desired.index))
@@ -1696,7 +1705,7 @@ def test_pandas_multivariate_dateindex():
def test_pandas_multivariate_dateindex_repetitions():
# Simulate will also have RangeIndex
- ix = pd.date_range(start='2000', periods=2, freq='M')
+ ix = pd.date_range(start='2000', periods=2, freq=MONTH_END)
endog = pd.DataFrame(np.zeros((2, 2)), columns=['y1', 'y2'], index=ix)
mod = varmax.VARMAX(endog, trend='n')
res = mod.filter([0.5, 0., 0., 0.2, 1., 0., 1.])
@@ -1712,7 +1721,7 @@ def test_pandas_multivariate_dateindex_repetitions():
# Alternative anchor changes the index
actual = res.simulate(2, anchor=2, state_shocks=np.zeros((2, 2)),
initial_state=np.zeros(2), repetitions=2)
- ix = pd.date_range(start='2000-03', periods=2, freq='M')
+ ix = pd.date_range(start='2000-03', periods=2, freq=MONTH_END)
columns = pd.MultiIndex.from_product([['y1', 'y2'], [0, 1]])
desired = pd.DataFrame(np.zeros((2, 4)), index=ix, columns=columns)
assert_allclose(actual, desired)
@@ -1722,7 +1731,7 @@ def test_pandas_multivariate_dateindex_repetitions():
def test_pandas_anchor():
# Test that anchor with dates works
- ix = pd.date_range(start='2000', periods=2, freq='M')
+ ix = pd.date_range(start='2000', periods=2, freq=MONTH_END)
endog = pd.Series(np.zeros(2), index=ix)
mod = sarimax.SARIMAX(endog)
res = mod.filter([0.5, 1.])
diff --git a/statsmodels/tsa/statespace/tests/test_structural.py b/statsmodels/tsa/statespace/tests/test_structural.py
index 76b8a101cda..e27102bad03 100644
--- a/statsmodels/tsa/statespace/tests/test_structural.py
+++ b/statsmodels/tsa/statespace/tests/test_structural.py
@@ -66,11 +66,11 @@ def run_ucm(name, use_exact_diffuse=False):
freqstr = freq[0] if freq is not None else values.index.freqstr[0]
if 'cycle_period_bounds' in kwargs:
cycle_period_bounds = kwargs['cycle_period_bounds']
- elif freqstr == 'A':
+ elif freqstr in ('A', 'AS', 'Y', 'YS'):
cycle_period_bounds = (1.5, 12)
- elif freqstr == 'Q':
+ elif freqstr in ('Q', 'QS'):
cycle_period_bounds = (1.5*4, 12*4)
- elif freqstr == 'M':
+ elif freqstr in ('M', 'MS'):
cycle_period_bounds = (1.5*12, 12*12)
else:
# If we have no information on data frequency, require the
diff --git a/statsmodels/tsa/statespace/tests/test_weights.py b/statsmodels/tsa/statespace/tests/test_weights.py
index d08bcb610f0..85cfce84e53 100644
--- a/statsmodels/tsa/statespace/tests/test_weights.py
+++ b/statsmodels/tsa/statespace/tests/test_weights.py
@@ -506,8 +506,10 @@ def test_compute_t_compute_j(compute_j, compute_t, reset_randomstate):
actual, _, _ = tools.compute_smoothed_state_weights(
res, compute_t=compute_t, compute_j=compute_j)
- compute_t = np.atleast_1d(compute_t)
- compute_j = np.atleast_1d(compute_j)
+ compute_t = np.unique(np.atleast_1d(compute_t))
+ compute_t.sort()
+ compute_j = np.unique(np.atleast_1d(compute_j))
+ compute_j.sort()
for t in np.arange(10):
if t not in compute_t:
desired[t, :] = np.nan
@@ -515,6 +517,10 @@ def test_compute_t_compute_j(compute_j, compute_t, reset_randomstate):
if j not in compute_j:
desired[:, j] = np.nan
+ # Subset to the actual required compute_t and compute_j
+ ix = np.ix_(compute_t, compute_j)
+ desired = desired[ix]
+
assert_allclose(actual, desired, atol=1e-7)
diff --git a/statsmodels/tsa/statespace/tools.py b/statsmodels/tsa/statespace/tools.py
index a45fd896410..43c708fe48b 100644
--- a/statsmodels/tsa/statespace/tools.py
+++ b/statsmodels/tsa/statespace/tools.py
@@ -1940,13 +1940,15 @@ def _compute_smoothed_state_weights(ssm, compute_t=None, compute_j=None,
# Re-order missing entries correctly and transpose to the appropriate
# shape
- if np.any(_model.nmissing):
+ t0 = min(compute_t[0], compute_j[0])
+ missing = np.isnan(ssm.endog[:, t0:])
+ if np.any(missing):
shape = weights.shape
# Transpose m, p, t, j, -> t, m, p, j so that we can use the
# `reorder_missing_matrix` function
weights = np.asfortranarray(weights.transpose(2, 0, 1, 3).reshape(
shape[2] * shape[0], shape[1], shape[3], order='C'))
- missing = np.asfortranarray(np.isnan(ssm.endog).astype(np.int32))
+ missing = np.asfortranarray(missing.astype(np.int32))
reorder_missing_matrix(weights, missing, reorder_cols=True,
inplace=True)
# Transpose t, m, p, j -> t, j, m, p,
@@ -1962,6 +1964,13 @@ def _compute_smoothed_state_weights(ssm, compute_t=None, compute_j=None,
# Transpose m, l, t -> t, m, l
prior_weights = prior_weights.transpose(2, 0, 1)
+ # Subset to the actual computed t, j elements
+ ix_tj = np.ix_(compute_t - t0, compute_j - t0)
+ weights = weights[ix_tj]
+ state_intercept_weights = state_intercept_weights[ix_tj]
+ if compute_prior_weights:
+ prior_weights = prior_weights[compute_t - t0]
+
return weights, state_intercept_weights, prior_weights
diff --git a/statsmodels/tsa/statespace/varmax.py b/statsmodels/tsa/statespace/varmax.py
index 2aaa7d970a5..303044d264c 100644
--- a/statsmodels/tsa/statespace/varmax.py
+++ b/statsmodels/tsa/statespace/varmax.py
@@ -1062,7 +1062,15 @@ def simulate(self, nsimulations, measurement_shocks=None,
return out
def _news_previous_results(self, previous, start, end, periods,
+ revisions_details_start=False,
state_index=None):
+ # TODO: tests for:
+ # - the model cloning used in `kalman_smoother.news` works when we
+ # have time-varying exog (i.e. or do we need to somehow explicitly
+ # call the _set_final_exog and _set_final_predicted_state methods
+ # on the rev_mod / revision_results)
+ # - in the case of revisions to `endog`, should the revised model use
+ # the `previous` exog? or the `revised` exog?
# We need to figure out the out-of-sample exog, so that we can add back
# in the last exog, predicted state
exog = None
@@ -1070,34 +1078,16 @@ def _news_previous_results(self, previous, start, end, periods,
if self.model.k_exog > 0 and out_of_sample > 0:
exog = self.model.exog[-out_of_sample:]
- # We also need to manually compute the `revised` results,
- rev_endog = self.model.endog[:previous.nobs].copy()
- rev_endog[previous.filter_results.missing.astype(bool).T] = np.nan
- has_revisions = not np.allclose(rev_endog, previous.model.endog,
- equal_nan=True)
- revised_results = None
- if has_revisions:
- rev_exog = None
- if self.model.exog is not None:
- rev_exog = self.model.exog[:previous.nobs].copy()
- rev_mod = previous.model.clone(rev_endog, exog=rev_exog)
- revised = rev_mod.smooth(self.params)
-
# Compute the news
with contextlib.ExitStack() as stack:
stack.enter_context(previous.model._set_final_exog(exog))
stack.enter_context(previous._set_final_predicted_state(
exog, out_of_sample))
- if has_revisions:
- stack.enter_context(revised.model._set_final_exog(exog))
- stack.enter_context(revised._set_final_predicted_state(
- exog, out_of_sample))
- revised_results = revised.smoother_results
-
out = self.smoother_results.news(
previous.smoother_results, start=start, end=end,
- revised=revised_results, state_index=state_index)
+ revisions_details_start=revisions_details_start,
+ state_index=state_index)
return out
@Appender(MLEResults.summary.__doc__)
diff --git a/statsmodels/tsa/stattools.py b/statsmodels/tsa/stattools.py
index 6b30f217308..891c77634fd 100644
--- a/statsmodels/tsa/stattools.py
+++ b/statsmodels/tsa/stattools.py
@@ -8,7 +8,7 @@
from statsmodels.compat.python import Literal, lzip
from statsmodels.compat.scipy import _next_regular
-from typing import Tuple
+from typing import Union, List
import warnings
import numpy as np
@@ -40,6 +40,8 @@
from statsmodels.tsa.adfvalues import mackinnoncrit, mackinnonp
from statsmodels.tsa.tsatools import add_trend, lagmat, lagmat2ds
+ArrayLike1D = Union[np.ndarray, pd.Series, List[float]]
+
__all__ = [
"acovf",
"acf",
@@ -709,8 +711,11 @@ def acf(
else:
return acf, qstat, pvalue
-
-def pacf_yw(x, nlags=None, method="adjusted"):
+def pacf_yw(
+ x: ArrayLike1D,
+ nlags: int | None = None,
+ method: Literal["adjusted", "mle"] = "adjusted",
+) -> np.ndarray:
"""
Partial autocorrelation estimated with non-recursive yule_walker.
@@ -747,8 +752,7 @@ def pacf_yw(x, nlags=None, method="adjusted"):
nlags = int_like(nlags, "nlags", optional=True)
nobs = x.shape[0]
if nlags is None:
- nlags = min(int(10 * np.log10(nobs)), nobs - 1)
-
+ nlags = max(min(int(10 * np.log10(nobs)), nobs - 1), 1)
method = string_like(method, "method", options=("adjusted", "mle"))
pacf = [1.0]
with warnings.catch_warnings():
@@ -758,7 +762,9 @@ def pacf_yw(x, nlags=None, method="adjusted"):
return np.array(pacf)
-def pacf_burg(x, nlags=None, demean=True):
+def pacf_burg(
+ x: ArrayLike1D, nlags: int | None = None, demean: bool = True
+) -> tuple[np.ndarray, np.ndarray]:
"""
Calculate Burg"s partial autocorrelation estimator.
@@ -800,6 +806,7 @@ def pacf_burg(x, nlags=None, demean=True):
x = x - x.mean()
nobs = x.shape[0]
p = nlags if nlags is not None else min(int(10 * np.log10(nobs)), nobs - 1)
+ p = max(p, 1)
if p > nobs - 1:
raise ValueError("nlags must be smaller than nobs - 1")
d = np.zeros(p + 1)
@@ -818,14 +825,19 @@ def pacf_burg(x, nlags=None, demean=True):
v[1:] = last_v[1:] - pacf[i] * last_u[:-1]
d[i + 1] = (1 - pacf[i] ** 2) * d[i] - v[i] ** 2 - u[-1] ** 2
pacf[i + 1] = 2 / d[i + 1] * v[i + 1 :].dot(u[i:-1])
- sigma2 = (1 - pacf ** 2) * d / (2.0 * (nobs - np.arange(0, p + 1)))
+ sigma2 = (1 - pacf**2) * d / (2.0 * (nobs - np.arange(0, p + 1)))
pacf[0] = 1 # Insert the 0 lag partial autocorrel
return pacf, sigma2
@deprecate_kwarg("unbiased", "adjusted")
-def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
+def pacf_ols(
+ x: ArrayLike1D,
+ nlags: int | None = None,
+ efficient: bool = True,
+ adjusted: bool = False,
+) -> np.ndarray:
"""
Calculate partial autocorrelations via OLS.
@@ -885,8 +897,9 @@ def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
adjusted = bool_like(adjusted, "adjusted")
nobs = x.shape[0]
if nlags is None:
- nlags = min(int(10 * np.log10(nobs)), nobs - 1)
-
+ nlags = max(min(int(10 * np.log10(nobs)), nobs // 2), 1)
+ if nlags > nobs//2:
+ raise ValueError(f"nlags must be smaller than nobs // 2 ({nobs//2})")
pacf = np.empty(nlags + 1)
pacf[0] = 1.0
if efficient:
@@ -903,14 +916,30 @@ def pacf_ols(x, nlags=None, efficient=True, adjusted=False):
params = lstsq(xlags[:, :k], x0, rcond=None)[0]
# Last coefficient corresponds to PACF value (see [1])
pacf[k] = np.squeeze(params[-1])
-
if adjusted:
pacf *= nobs / (nobs - np.arange(nlags + 1))
-
return pacf
-def pacf(x, nlags=None, method="ywadjusted", alpha=None):
+def pacf(
+ x: ArrayLike1D,
+ nlags: int | None = None,
+ method: Literal[
+ "yw",
+ "ywadjusted",
+ "ols",
+ "ols-inefficient",
+ "ols-adjusted",
+ "ywm",
+ "ywmle",
+ "ld",
+ "ldadjusted",
+ "ldb",
+ "ldbiased",
+ "burg",
+ ] = "ywadjusted",
+ alpha: float | None = None,
+) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
"""
Partial autocorrelation estimate.
@@ -996,7 +1025,7 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
"ldb",
"ldbiased",
"ld_biased",
- "burg"
+ "burg",
)
x = array_like(x, "x", maxdim=2)
method = string_like(method, "method", options=methods)
@@ -1005,13 +1034,13 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
nobs = x.shape[0]
if nlags is None:
nlags = min(int(10 * np.log10(nobs)), nobs // 2 - 1)
- if nlags >= x.shape[0] // 2:
+ nlags = max(nlags, 1)
+ if nlags > x.shape[0] // 2:
raise ValueError(
"Can only compute partial correlations for lags up to 50% of the "
f"sample size. The requested nlags {nlags} must be < "
f"{x.shape[0] // 2}."
)
-
if method in ("ols", "ols-inefficient", "ols-adjusted"):
efficient = "inefficient" not in method
adjusted = "adjusted" in method
@@ -1031,7 +1060,6 @@ def pacf(x, nlags=None, method="ywadjusted", alpha=None):
acv = acovf(x, adjusted=False, fft=False)
ld_ = levinson_durbin(acv, nlags=nlags, isacov=True)
ret = ld_[2]
-
if alpha is not None:
varacf = 1.0 / len(x) # for all lags >=1
interval = stats.norm.ppf(1.0 - alpha / 2.0) * np.sqrt(varacf)
@@ -1538,7 +1566,7 @@ def grangercausalitytests(x, maxlag, addconst=True, verbose=None):
if x.shape[0] <= 3 * maxlag + int(addconst):
raise ValueError(
"Insufficient observations. Maximum allowable "
- "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)
+ "lag is {}".format(int((x.shape[0] - int(addconst)) / 3) - 1)
)
resli = {}
@@ -1945,7 +1973,7 @@ def kpss(
regression: Literal["c", "ct"] = "c",
nlags: Literal["auto", "legacy"] | int = "auto",
store: bool = False,
-) -> Tuple[float, float, int, dict[str, float]]:
+) -> tuple[float, float, int, dict[str, float]]:
"""
Kwiatkowski-Phillips-Schmidt-Shin test for stationarity.
@@ -2032,7 +2060,7 @@ def kpss(
# if m is not one, n != m * n
if nobs != x.size:
- raise ValueError("x of shape {0} not understood".format(x.shape))
+ raise ValueError(f"x of shape {x.shape} not understood")
if hypo == "ct":
# p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
@@ -2106,8 +2134,8 @@ def kpss(
rstore.nobs = nobs
stationary_type = "level" if hypo == "c" else "trend"
- rstore.H0 = "The series is {0} stationary".format(stationary_type)
- rstore.HA = "The series is not {0} stationary".format(stationary_type)
+ rstore.H0 = f"The series is {stationary_type} stationary"
+ rstore.HA = f"The series is not {stationary_type} stationary"
return kpss_stat, p_value, crit_dict, rstore
else:
@@ -2200,7 +2228,7 @@ def range_unit_root_test(x, store=False):
# if m is not one, n != m * n
if nobs != x.size:
- raise ValueError("x of shape {0} not understood".format(x.shape))
+ raise ValueError(f"x of shape {x.shape} not understood")
# Table from [1] has been replicated using 200,000 samples
# Critical values for new n_obs values have been identified
diff --git a/statsmodels/tsa/stl/tests/test_stl.py b/statsmodels/tsa/stl/tests/test_stl.py
index fc51365243d..5862dcfacdc 100644
--- a/statsmodels/tsa/stl/tests/test_stl.py
+++ b/statsmodels/tsa/stl/tests/test_stl.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import MONTH_END
+
import os
import pickle
@@ -274,7 +276,7 @@ def test_period_detection(default_kwargs):
del class_kwargs["period"]
endog = class_kwargs["endog"]
- index = pd.date_range("1-1-1959", periods=348, freq="M")
+ index = pd.date_range("1-1-1959", periods=348, freq=MONTH_END)
class_kwargs["endog"] = pd.Series(endog, index=index)
mod = STL(**class_kwargs)
@@ -336,6 +338,6 @@ def test_pickle(default_kwargs):
def test_squezable_to_1d():
data = co2.load().data
- data = data.resample("M").mean().ffill()
+ data = data.resample(MONTH_END).mean().ffill()
res = STL(data).fit()
assert isinstance(res, DecomposeResult)
diff --git a/statsmodels/tsa/tests/test_ar.py b/statsmodels/tsa/tests/test_ar.py
index 161ce0c6522..f80b3ca05c5 100644
--- a/statsmodels/tsa/tests/test_ar.py
+++ b/statsmodels/tsa/tests/test_ar.py
@@ -1,11 +1,12 @@
"""
Test AR Model
"""
+from statsmodels.compat.pandas import MONTH_END
from statsmodels.compat.pytest import pytest_warns
-from typing import NamedTuple, Union
import datetime as dt
from itertools import product
+from typing import NamedTuple, Union
import numpy as np
from numpy.testing import assert_allclose, assert_almost_equal
@@ -40,7 +41,7 @@
def gen_ar_data(nobs):
rs = np.random.RandomState(982739)
- idx = pd.date_range(dt.datetime(1900, 1, 1), freq="M", periods=nobs)
+ idx = pd.date_range(dt.datetime(1900, 1, 1), freq=MONTH_END, periods=nobs)
return pd.Series(rs.standard_normal(nobs), index=idx), rs
@@ -273,7 +274,7 @@ def gen_data(nobs, nexog, pandas, seed=92874765):
exog = rs.standard_normal((nobs, nexog)) if nexog else None
if pandas:
index = pd.date_range(
- dt.datetime(1999, 12, 31), periods=nobs, freq="M"
+ dt.datetime(1999, 12, 31), periods=nobs, freq=MONTH_END
)
endog = pd.Series(endog, name="endog", index=index)
if nexog:
@@ -722,7 +723,9 @@ def test_ar_order_select():
y = arma_generate_sample([1, -0.75, 0.3], [1], 100)
ts = Series(
y,
- index=date_range(start=dt.datetime(1990, 1, 1), periods=100, freq="M"),
+ index=date_range(
+ start=dt.datetime(1990, 1, 1), periods=100, freq=MONTH_END
+ ),
)
res = ar_select_order(ts, maxlag=12, ic="aic")
assert tuple(res.ar_lags) == (1, 2)
@@ -808,7 +811,7 @@ def test_equiv_dynamic(reset_randomstate):
res = mod.fit()
pred0 = res.predict(500, 800, dynamic=0)
pred1 = res.predict(500, 800, dynamic=True)
- idx = pd.date_range(dt.datetime(2000, 1, 30), periods=1001, freq="M")
+ idx = pd.date_range(dt.datetime(2000, 1, 30), periods=1001, freq=MONTH_END)
y = pd.Series(y, index=idx)
mod = AutoReg(y, 1)
res = mod.fit()
@@ -853,8 +856,12 @@ def test_predict_seasonal():
for i in range(1, 1001):
y[i] = 10 + 0.9 * y[i - 1] + e[i] + effects[i % 12]
ys = pd.Series(
- y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M")
+ y,
+ index=pd.date_range(
+ dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
+ ),
)
+
mod = AutoReg(ys, 1, seasonal=True)
res = mod.fit()
c = res.params.iloc[0]
@@ -867,14 +874,14 @@ def test_predict_seasonal():
for i in range(1, 201):
direct[i] = direct[i - 1] * ar + c + seasons[(900 + i) % 12]
direct = pd.Series(
- direct, index=pd.date_range(ys.index[900], periods=201, freq="M")
+ direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
pred = res.predict(900, dynamic=False)
direct = y[899:-1] * ar + c + seasons[np.arange(900, 1001) % 12]
direct = pd.Series(
- direct, index=pd.date_range(ys.index[900], periods=101, freq="M")
+ direct, index=pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
)
assert_series_equal(pred, direct)
@@ -888,7 +895,10 @@ def test_predict_exog():
for i in range(3, 1001):
y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i] + x[i].sum()
ys = pd.Series(
- y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M")
+ y,
+ index=pd.date_range(
+ dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
+ ),
)
xdf = pd.DataFrame(x, columns=["x0", "x1"], index=ys.index)
mod = AutoReg(ys, [1, 3], trend="c", exog=xdf)
@@ -903,7 +913,7 @@ def test_predict_exog():
phi_2 = ar.iloc[1]
direct = c + phi_1 * y[899:-1] + phi_2 * y[897:-3]
direct += ex[0] * x[900:, 0] + ex[1] * x[900:, 1]
- idx = pd.date_range(ys.index[900], periods=101, freq="M")
+ idx = pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
direct = pd.Series(direct, index=idx)
assert_series_equal(pred, direct)
exog_oos = rs.standard_normal((100, 2))
@@ -923,7 +933,7 @@ def test_predict_exog():
direct[i] += exog_oos[i - 101] @ ex
direct = pd.Series(
- direct, index=pd.date_range(ys.index[900], periods=201, freq="M")
+ direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
@@ -936,7 +946,10 @@ def test_predict_irregular_ar():
for i in range(3, 1001):
y[i] = 10 + 0.9 * y[i - 1] - 0.5 * y[i - 3] + e[i]
ys = pd.Series(
- y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M")
+ y,
+ index=pd.date_range(
+ dt.datetime(1950, 1, 1), periods=1001, freq=MONTH_END
+ )
)
mod = AutoReg(ys, [1, 3], trend="ct")
res = mod.fit()
@@ -954,7 +967,7 @@ def test_predict_irregular_ar():
c + t * (901 + i) + ar[0] * direct[i - 1] + ar[1] * direct[i - 3]
)
direct = pd.Series(
- direct, index=pd.date_range(ys.index[900], periods=201, freq="M")
+ direct, index=pd.date_range(ys.index[900], periods=201, freq=MONTH_END)
)
assert_series_equal(pred, direct)
@@ -965,7 +978,7 @@ def test_predict_irregular_ar():
+ ar[0] * y[899:-1]
+ ar[1] * y[897:-3]
)
- idx = pd.date_range(ys.index[900], periods=101, freq="M")
+ idx = pd.date_range(ys.index[900], periods=101, freq=MONTH_END)
direct = pd.Series(direct, index=idx)
assert_series_equal(pred, direct)
@@ -980,12 +993,20 @@ def test_forecast_start_end_equiv(dynamic):
for i in range(1, 1001):
y[i] = 10 + 0.9 * y[i - 1] + e[i] + effects[i % 12]
ys = pd.Series(
- y, index=pd.date_range(dt.datetime(1950, 1, 1), periods=1001, freq="M")
+ y, index=pd.date_range(
+ dt.datetime(1950, 1, 1),
+ periods=1001,
+ freq=MONTH_END
+ )
)
mod = AutoReg(ys, 1, seasonal=True)
res = mod.fit()
pred_int = res.predict(1000, 1020, dynamic=dynamic)
- dates = pd.date_range(dt.datetime(1950, 1, 1), periods=1021, freq="M")
+ dates = pd.date_range(
+ dt.datetime(1950, 1, 1),
+ periods=1021,
+ freq=MONTH_END
+ )
pred_dates = res.predict(dates[1000], dates[1020], dynamic=dynamic)
assert_series_equal(pred_int, pred_dates)
@@ -1057,7 +1078,7 @@ def test_autoreg_plot_err():
def test_autoreg_resids():
- idx = pd.date_range(dt.datetime(1900, 1, 1), periods=250, freq="M")
+ idx = pd.date_range(dt.datetime(1900, 1, 1), periods=250, freq=MONTH_END)
rs = np.random.RandomState(0)
idx_dates = sorted(rs.choice(idx, size=100, replace=False))
e = rs.standard_normal(250)
@@ -1259,7 +1280,7 @@ def append_data():
x_oos = rs.standard_normal((10, 3))
y_oos = rs.standard_normal(10)
index = pd.date_range(
- "2020-1-1", periods=y.shape[0] + y_oos.shape[0], freq="M"
+ "2020-1-1", periods=y.shape[0] + y_oos.shape[0], freq=MONTH_END
)
y = pd.Series(y, index=index[: y.shape[0]], name="y")
x = pd.DataFrame(
diff --git a/statsmodels/tsa/tests/test_arima_process.py b/statsmodels/tsa/tests/test_arima_process.py
index 3dd8bfb5dd6..af1b3f90a3c 100644
--- a/statsmodels/tsa/tests/test_arima_process.py
+++ b/statsmodels/tsa/tests/test_arima_process.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import QUARTER_END
+
import datetime as dt
import numpy as np
@@ -474,7 +476,7 @@ def test_from_estimation(d, seasonal):
ar = [0.8] if not seasonal else [0.8, 0, 0, 0.2, -0.16]
ma = [0.4] if not seasonal else [0.4, 0, 0, 0.2, -0.08]
ap = ArmaProcess.from_coeffs(ar, ma, 500)
- idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq="Q")
+ idx = pd.date_range(dt.datetime(1900, 1, 1), periods=500, freq=QUARTER_END)
data = ap.generate_sample(500)
if d == 1:
data = np.cumsum(data)
diff --git a/statsmodels/tsa/tests/test_deterministic.py b/statsmodels/tsa/tests/test_deterministic.py
index d63e58ac047..2c31cd7e41a 100644
--- a/statsmodels/tsa/tests/test_deterministic.py
+++ b/statsmodels/tsa/tests/test_deterministic.py
@@ -1,4 +1,10 @@
-from statsmodels.compat.pandas import PD_LT_1_0_0, is_int_index
+from statsmodels.compat.pandas import (
+ MONTH_END,
+ PD_LT_1_0_0,
+ QUARTER_END,
+ YEAR_END,
+ is_int_index,
+)
from statsmodels.compat.pytest import pytest_warns
from typing import Hashable, Tuple
@@ -30,7 +36,7 @@ def time_index(request):
def index(request):
param = request.param
if param in ("period", "datetime"):
- idx = pd.date_range("2000-01-01", periods=137, freq="M")
+ idx = pd.date_range("2000-01-01", periods=137, freq=MONTH_END)
if param == "period":
idx = idx.to_period("M")
elif param == "range":
@@ -145,7 +151,7 @@ def test_fourier_smoke(index, forecast_index):
@pytest.mark.smoke
def test_calendar_time_trend_smoke(time_index, forecast_index):
- ct = CalendarTimeTrend("A", order=2)
+ ct = CalendarTimeTrend(YEAR_END, order=2)
ct.in_sample(time_index)
steps = 83 if forecast_index is None else len(forecast_index)
ct.out_of_sample(steps, time_index, forecast_index)
@@ -159,7 +165,7 @@ def test_calendar_time_trend_smoke(time_index, forecast_index):
@pytest.mark.smoke
def test_calendar_fourier_smoke(time_index, forecast_index):
- cf = CalendarFourier("A", 2)
+ cf = CalendarFourier(YEAR_END, 2)
cf.in_sample(time_index)
steps = 83 if forecast_index is None else len(forecast_index)
cf.out_of_sample(steps, time_index, forecast_index)
@@ -192,14 +198,14 @@ def test_calendar_seasonality(time_index, forecast_index, freq_period):
def test_forbidden_index():
index = pd.RangeIndex(0, 10)
- ct = CalendarTimeTrend("A", order=2)
+ ct = CalendarTimeTrend(YEAR_END, order=2)
with pytest.raises(TypeError, match="CalendarTimeTrend terms can only"):
ct.in_sample(index)
def test_calendar_time_trend_base(time_index):
- ct = CalendarTimeTrend("M", True, order=3, base_period="1960-1-1")
- ct2 = CalendarTimeTrend("M", True, order=3)
+ ct = CalendarTimeTrend(MONTH_END, True, order=3, base_period="1960-1-1")
+ ct2 = CalendarTimeTrend(MONTH_END, True, order=3)
assert ct != ct2
str(ct)
str(ct2)
@@ -209,14 +215,14 @@ def test_calendar_time_trend_base(time_index):
def test_invalid_freq_period(time_index):
with pytest.raises(ValueError, match="The combination of freq="):
- CalendarSeasonality("H", "A")
+ CalendarSeasonality("h", YEAR_END)
cs = CalendarSeasonality("B", "W")
with pytest.raises(ValueError, match="freq is B but index contains"):
cs.in_sample(pd.date_range("2000-1-1", periods=10, freq="D"))
def test_check_index_type():
- ct = CalendarTimeTrend("A", True, order=3)
+ ct = CalendarTimeTrend(YEAR_END, True, order=3)
idx = pd.RangeIndex(0, 20)
with pytest.raises(TypeError, match="CalendarTimeTrend terms can only"):
ct._check_index_type(idx, pd.DatetimeIndex)
@@ -429,8 +435,8 @@ def test_calendar_time_trend(reset_randomstate):
def test_calendar_seasonal_period_w():
period = "W"
- index = pd.date_range("2000-01-03", freq="H", periods=600)
- cs = CalendarSeasonality("H", period=period)
+ index = pd.date_range("2000-01-03", freq="h", periods=600)
+ cs = CalendarSeasonality("h", period=period)
terms = cs.in_sample(index)
assert np.all(terms.sum(1) == 1.0)
for i in range(index.shape[0]):
@@ -453,8 +459,8 @@ def test_calendar_seasonal_period_w():
def test_calendar_seasonal_period_d():
period = "D"
- index = pd.date_range("2000-01-03", freq="H", periods=600)
- cs = CalendarSeasonality("H", period=period)
+ index = pd.date_range("2000-01-03", freq="h", periods=600)
+ cs = CalendarSeasonality("h", period=period)
terms = cs.in_sample(index)
assert np.all(terms.sum(1) == 1.0)
for i in range(index.shape[0]):
@@ -463,8 +469,8 @@ def test_calendar_seasonal_period_d():
def test_calendar_seasonal_period_q():
period = "Q"
- index = pd.date_range("2000-01-01", freq="M", periods=600)
- cs = CalendarSeasonality("M", period=period)
+ index = pd.date_range("2000-01-01", freq=MONTH_END, periods=600)
+ cs = CalendarSeasonality(MONTH_END, period=period)
terms = cs.in_sample(index)
assert np.all(terms.sum(1) == 1.0)
for i in range(index.shape[0]):
@@ -472,15 +478,15 @@ def test_calendar_seasonal_period_q():
def test_calendar_seasonal_period_a():
- period = "A"
- index = pd.date_range("2000-01-01", freq="M", periods=600)
- cs = CalendarSeasonality("M", period=period)
+ period = "Y"
+ index = pd.date_range("2000-01-01", freq=MONTH_END, periods=600)
+ cs = CalendarSeasonality(MONTH_END, period=period)
terms = cs.in_sample(index)
assert np.all(terms.sum(1) == 1.0)
for i in range(index.shape[0]):
assert terms.iloc[i, i % 12] == 1.0
- cs = CalendarSeasonality("Q", period=period)
+ cs = CalendarSeasonality(QUARTER_END, period=period)
terms = cs.in_sample(index)
assert np.all(terms.sum(1) == 1.0)
for i in range(index.shape[0]):
@@ -530,7 +536,7 @@ def test_range_error():
def test_range_index_basic():
- idx = pd.date_range("2000-1-1", freq="M", periods=120)
+ idx = pd.date_range("2000-1-1", freq=MONTH_END, periods=120)
dp = DeterministicProcess(idx, constant=True, order=1, seasonal=True)
dp.range("2001-1-1", "2008-1-1")
dp.range("2001-1-1", "2015-1-1")
diff --git a/statsmodels/tsa/tests/test_exponential_smoothing.py b/statsmodels/tsa/tests/test_exponential_smoothing.py
index af3030b4544..dad5b53696e 100644
--- a/statsmodels/tsa/tests/test_exponential_smoothing.py
+++ b/statsmodels/tsa/tests/test_exponential_smoothing.py
@@ -1,6 +1,7 @@
"""
Author: Samuel Scherrer
"""
+from statsmodels.compat.pandas import QUARTER_END
from statsmodels.compat.platform import PLATFORM_LINUX32, PLATFORM_WIN
from itertools import product
@@ -220,7 +221,7 @@ def austourists():
61.09776802,
66.05576122,
]
- index = pd.date_range("1999-01-01", "2015-12-31", freq="Q")
+ index = pd.date_range("1999-01-01", "2015-12-31", freq=QUARTER_END)
return pd.Series(data, index)
diff --git a/statsmodels/tsa/tests/test_seasonal.py b/statsmodels/tsa/tests/test_seasonal.py
index 3fab8d7caa2..2a1f0b630a7 100644
--- a/statsmodels/tsa/tests/test_seasonal.py
+++ b/statsmodels/tsa/tests/test_seasonal.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import MONTH_END, QUARTER_END, YEAR_END
+
import numpy as np
from numpy.testing import (
assert_allclose,
@@ -54,7 +56,7 @@ def setup_class(cls):
232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184]
cls.data = pd.DataFrame(data, pd.date_range(start='1/1/1951',
periods=len(data),
- freq='Q'))
+ freq=QUARTER_END))
def test_ndarray(self):
res_add = seasonal_decompose(self.data.values, period=4)
@@ -93,7 +95,7 @@ def test_pandas(self):
res_add = seasonal_decompose(self.data, period=4)
freq_override_data = self.data.copy()
freq_override_data.index = pd.date_range(
- start='1/1/1951', periods=len(freq_override_data), freq='Y')
+ start='1/1/1951', periods=len(freq_override_data), freq=YEAR_END)
res_add_override = seasonal_decompose(freq_override_data, period=4)
assert_almost_equal(res_add.seasonal.values.squeeze(), SEASONAL, 2)
@@ -270,14 +272,14 @@ def test_raises(self):
def test_seasonal_decompose_too_short(reset_randomstate):
- dates = pd.date_range('2000-01-31', periods=4, freq='Q')
+ dates = pd.date_range('2000-01-31', periods=4, freq=QUARTER_END)
y = np.sin(np.arange(4) / 4 * 2 * np.pi)
y += np.random.standard_normal(y.size)
y = pd.Series(y, name='y', index=dates)
with pytest.raises(ValueError):
seasonal_decompose(y)
- dates = pd.date_range('2000-01-31', periods=12, freq='M')
+ dates = pd.date_range('2000-01-31', periods=12, freq=MONTH_END)
y = np.sin(np.arange(12) / 12 * 2 * np.pi)
y += np.random.standard_normal(y.size)
y = pd.Series(y, name='y', index=dates)
@@ -296,7 +298,7 @@ def test_seasonal_decompose_smoke():
data = pd.DataFrame(x, pd.date_range(start='1/1/1951',
periods=len(x),
- freq='Q'))
+ freq=QUARTER_END))
seasonal_decompose(data)
diff --git a/statsmodels/tsa/tests/test_stattools.py b/statsmodels/tsa/tests/test_stattools.py
index ba53aa8d7b2..abbc071810f 100644
--- a/statsmodels/tsa/tests/test_stattools.py
+++ b/statsmodels/tsa/tests/test_stattools.py
@@ -1,5 +1,5 @@
from statsmodels.compat.numpy import lstsq
-from statsmodels.compat.pandas import assert_index_equal
+from statsmodels.compat.pandas import MONTH_END, YEAR_END, assert_index_equal
from statsmodels.compat.platform import PLATFORM_WIN
from statsmodels.compat.python import lrange
@@ -17,8 +17,8 @@
import pandas as pd
from pandas import DataFrame, Series, date_range
import pytest
-from scipy.interpolate import interp1d
from scipy import stats
+from scipy.interpolate import interp1d
from statsmodels.datasets import macrodata, modechoice, nile, randhie, sunspots
from statsmodels.tools.sm_exceptions import (
@@ -38,8 +38,8 @@
adfuller,
arma_order_select_ic,
breakvar_heteroskedasticity_test,
- ccovf,
ccf,
+ ccovf,
coint,
grangercausalitytests,
innovations_algo,
@@ -1034,7 +1034,7 @@ def test_pandasacovf():
def test_acovf2d(reset_randomstate):
dta = sunspots.load_pandas().data
- dta.index = date_range(start="1700", end="2009", freq="Y")[:309]
+ dta.index = date_range(start="1700", end="2009", freq=YEAR_END)[:309]
del dta["YEAR"]
res = acovf(dta, fft=False)
assert_equal(res, acovf(dta.values, fft=False))
@@ -1119,7 +1119,7 @@ def test_arma_order_select_ic():
assert_(res.bic.index.equals(bic.index))
assert_(res.bic.columns.equals(bic.columns))
- index = pd.date_range("2000-1-1", freq="M", periods=len(y))
+ index = pd.date_range("2000-1-1", freq=MONTH_END, periods=len(y))
y_series = pd.Series(y, index=index)
res_pd = arma_order_select_ic(
y_series, max_ar=2, max_ma=1, ic=["aic", "bic"], trend="n"
@@ -1533,7 +1533,7 @@ def test_acf_conservate_nanops(reset_randomstate):
def test_pacf_nlags_error(reset_randomstate):
- e = np.random.standard_normal(100)
+ e = np.random.standard_normal(99)
with pytest.raises(ValueError, match="Can only compute partial"):
pacf(e, 50)
@@ -1584,3 +1584,27 @@ def test_granger_causality_exception_maxlag(gc_data):
def test_granger_causality_verbose(gc_data):
with pytest.warns(FutureWarning, match="verbose"):
grangercausalitytests(gc_data, 3, verbose=True)
+
+@pytest.mark.parametrize("size",[3,5,7,9])
+def test_pacf_small_sample(size,reset_randomstate):
+ y = np.random.standard_normal(size)
+ a = pacf(y)
+ assert isinstance(a, np.ndarray)
+ a, b = pacf_burg(y)
+ assert isinstance(a, np.ndarray)
+ assert isinstance(b, np.ndarray)
+ a = pacf_ols(y)
+ assert isinstance(a, np.ndarray)
+ a = pacf_yw(y)
+ assert isinstance(a, np.ndarray)
+
+
+def test_pacf_1_obs(reset_randomstate):
+ y = np.random.standard_normal(1)
+ with pytest.raises(ValueError):
+ pacf(y)
+ with pytest.raises(ValueError):
+ pacf_burg(y)
+ with pytest.raises(ValueError):
+ pacf_ols(y)
+ pacf_yw(y)
diff --git a/statsmodels/tsa/tests/test_tsa_tools.py b/statsmodels/tsa/tests/test_tsa_tools.py
index e4808496551..3d32e60d630 100644
--- a/statsmodels/tsa/tests/test_tsa_tools.py
+++ b/statsmodels/tsa/tests/test_tsa_tools.py
@@ -1,7 +1,13 @@
"""tests for some time series analysis functions
"""
-from statsmodels.compat.pandas import assert_frame_equal, assert_series_equal
+from statsmodels.compat.pandas import (
+ PD_LT_2_2_0,
+ QUARTER_END,
+ YEAR_END,
+ assert_frame_equal,
+ assert_series_equal,
+)
import numpy as np
from numpy.testing import (
@@ -11,6 +17,7 @@
assert_raises,
)
import pandas as pd
+from pandas.tseries.frequencies import to_offset
import pytest
from statsmodels import regression
@@ -465,14 +472,28 @@ def test_duplicate_column_names(self):
stattools.lagmat(df, maxlag=2, use_pandas=True)
-def test_freq_to_period():
- from pandas.tseries.frequencies import to_offset
-
- freqs = ["A", "AS-MAR", "Q", "QS", "QS-APR", "W", "W-MON", "B", "D", "H"]
- expected = [1, 1, 4, 4, 4, 52, 52, 5, 7, 24]
- for i, j in zip(freqs, expected):
- assert_equal(tools.freq_to_period(i), j)
- assert_equal(tools.freq_to_period(to_offset(i)), j)
+ANNUAL = "A" if PD_LT_2_2_0 else YEAR_END
+freqs = [
+ YEAR_END,
+ f"{ANNUAL}-MAR",
+ QUARTER_END,
+ "QS",
+ "QS-APR",
+ "W",
+ "W-MON",
+ "B",
+ "D",
+ "h",
+]
+expected = [1, 1, 4, 4, 4, 52, 52, 5, 7, 24]
+freq_expected = [(f, e) for f, e in zip(freqs, expected)]
+
+
+@pytest.mark.parametrize("freq_expected", freq_expected)
+def test_freq_to_period(freq_expected):
+ freq, expected = freq_expected
+ assert_equal(tools.freq_to_period(freq), expected)
+ assert_equal(tools.freq_to_period(to_offset(freq)), expected)
class TestDetrend:
diff --git a/statsmodels/tsa/tests/test_x13.py b/statsmodels/tsa/tests/test_x13.py
index b6de96ad53a..357b7881c88 100644
--- a/statsmodels/tsa/tests/test_x13.py
+++ b/statsmodels/tsa/tests/test_x13.py
@@ -1,3 +1,5 @@
+from statsmodels.compat.pandas import MONTH_END
+
import pandas as pd
import pytest
@@ -20,7 +22,7 @@
dta = co2.load_pandas().data
dta['co2'] = dta.co2.interpolate()
-monthly_data = dta.resample('M')
+monthly_data = dta.resample(MONTH_END)
# change in pandas 0.18 resample is deferred object
if not isinstance(monthly_data, (pd.DataFrame, pd.Series)):
monthly_data = monthly_data.mean()
diff --git a/statsmodels/tsa/tsatools.py b/statsmodels/tsa/tsatools.py
index 5e3e9a293ab..048b56a2e05 100644
--- a/statsmodels/tsa/tsatools.py
+++ b/statsmodels/tsa/tsatools.py
@@ -1,6 +1,6 @@
from __future__ import annotations
-from statsmodels.compat.python import lrange, Literal
+from statsmodels.compat.python import Literal, lrange
import warnings
@@ -804,7 +804,8 @@ def freq_to_period(freq: str | offsets.DateOffset) -> int:
assert isinstance(freq, offsets.DateOffset)
freq = freq.rule_code.upper()
- if freq in ("A", "Y") or freq.startswith(("A-", "AS-", "Y-", "YS-")):
+ yearly_freqs = ("A-", "AS-", "Y-", "YS-", "YE-")
+ if freq in ("A", "Y") or freq.startswith(yearly_freqs):
return 1
elif freq == "Q" or freq.startswith(("Q-", "QS", "QE")):
return 4
diff --git a/statsmodels/tsa/vector_ar/tests/test_var.py b/statsmodels/tsa/vector_ar/tests/test_var.py
index 6fc0b39151c..aba11565a4e 100644
--- a/statsmodels/tsa/vector_ar/tests/test_var.py
+++ b/statsmodels/tsa/vector_ar/tests/test_var.py
@@ -2,7 +2,7 @@
"""
Test VAR Model
"""
-from statsmodels.compat.pandas import assert_index_equal
+from statsmodels.compat.pandas import QUARTER_END, assert_index_equal
from statsmodels.compat.python import lrange
from io import BytesIO, StringIO
@@ -600,7 +600,7 @@ def setup_class(cls):
adj_data = np.diff(np.log(data), axis=0)
# est = VAR(adj_data, p=2, dates=dates[1:], names=names)
- cls.model = VAR(adj_data[:-16], dates=dates[1:-16], freq="BQ-MAR")
+ cls.model = VAR(adj_data[:-16], dates=dates[1:-16], freq=f"B{QUARTER_END}-MAR")
cls.res = cls.model.fit(maxlags=cls.p)
cls.irf = cls.res.irf(10)
cls.lut = E1_Results()
diff --git a/tools/ci/azure/azure_template_posix.yml b/tools/ci/azure/azure_template_posix.yml
index 5c383dfa756..73a37c7d61c 100644
--- a/tools/ci/azure/azure_template_posix.yml
+++ b/tools/ci/azure/azure_template_posix.yml
@@ -17,9 +17,14 @@ jobs:
strategy:
matrix:
${{ if eq(parameters.name, 'Linux') }}:
+ python_312_latest:
+ python.version: '3.12'
+ coverage: true
python_311_latest:
python.version: '3.11'
lint: true
+ CYTHON: 3.0.6
+ coverage: true
python_310_latest_install:
python.version: '3.10'
lint: true
@@ -35,6 +40,8 @@ jobs:
SCIPY: 1.5.4
PANDAS: 1.2.5
NUMPY: 1.20.3
+ CYTHON: 0.29.33
+ coverage: true
lint: true
python38:
python.version: '3.8'
@@ -45,6 +52,7 @@ jobs:
MATPLOTLIB: 3.2.2
coverage: true
PYTEST_OPTIONS: ''
+ CYTHON: 0.29.36
lint: true
python_38_legacy_blas:
python.version: '3.8'
@@ -70,8 +78,12 @@ jobs:
pip.pre: true
SM_TEST_COPY_ON_WRITE: 1
${{ if eq(parameters.name, 'macOS') }}:
+ python310_macos_latest:
+ python.version: '3.10'
python311_macos_latest:
python.version: '3.11'
+ python312_macos_latest:
+ python.version: '3.12'
maxParallel: 10
steps:
@@ -109,7 +121,7 @@ jobs:
condition: eq(variables['test.install'], 'true')
- script: |
- python -m pip install -e . -v --no-build-isolation
+ python -m pip install -e . -vv --no-build-isolation
displayName: 'Install statsmodels (editable)'
condition: ne(variables['test.install'], 'true')
@@ -128,6 +140,9 @@ jobs:
mkdir test_run_dir
pushd test_run_dir
python -c "import statsmodels; statsmodels.test(['-n', 'auto', '--junitxml=../junit/test-results.xml', '--skip-examples', '--dist', 'loadscope', '--durations=25'], exit=True)"
+ LAST_EXIT_CODE=$?
+ echo "Status code: ${LAST_EXIT_CODE}"
+ exit ${LAST_EXIT_CODE}
popd
displayName: 'Run tests (site-packages)'
condition: eq(variables['test.install'], 'true')
diff --git a/tools/ci/azure/azure_template_windows.yml b/tools/ci/azure/azure_template_windows.yml
index 9062f03c3cf..d34132a9c56 100644
--- a/tools/ci/azure/azure_template_windows.yml
+++ b/tools/ci/azure/azure_template_windows.yml
@@ -28,6 +28,9 @@ jobs:
python311_win_latest:
python.version: '3.11'
python.architecture: 'x64'
+ python312_win_latest:
+ python.version: '3.12'
+ python.architecture: 'x64'
maxParallel: 10
steps:
diff --git a/tools/ci/azure/install-posix.sh b/tools/ci/azure/install-posix.sh
index da701ba44c6..e5ac78afa66 100644
--- a/tools/ci/azure/install-posix.sh
+++ b/tools/ci/azure/install-posix.sh
@@ -14,20 +14,27 @@ else
CMD="python -m pip install numpy"
fi
+echo "Python location: $(where python)"
python -m pip install --upgrade pip setuptools wheel build
-python -m pip install "cython>=0.29.28,<3.0.0" "pytest~=7.0.1" pytest-xdist coverage pytest-cov ipython jupyter notebook nbconvert "property_cached>=1.6.3" black==20.8b1 isort flake8 nbconvert==5.6.1 coveralls setuptools_scm[toml]~=7.0.0
+python -m pip install -r requirements-dev.txt
+python -m pip uninstall numpy scipy pandas cython -y
if [[ -n ${NUMPY} ]]; then CMD="$CMD==${NUMPY}"; fi;
CMD="$CMD scipy"
if [[ -n ${SCIPY} ]]; then CMD="$CMD==${SCIPY}"; fi;
CMD="$CMD pandas"
if [[ -n ${PANDAS} ]]; then CMD="$CMD==${PANDAS}"; fi;
+CMD="$CMD cython"
+if [[ -n ${CYTHON} ]]; then CMD="$CMD==${CYTHON}"; fi;
if [[ ${USE_MATPLOTLIB} == true ]]; then
CMD="$CMD matplotlib"
if [[ -n ${MATPLOTLIB} ]]; then
CMD="$CMD==${MATPLOTLIB}";
fi
+else
+ # Uninstall if not needed
+ python -m pip uninstall matplotlib -y || true
fi
CMD="${CMD} patsy ${BLAS}"
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 72bbd0cdecd..00000000000
--- a/tox.ini
+++ /dev/null
@@ -1,67 +0,0 @@
-##############################################################################
-# Intended to simplify local generation of coverage of Cython files
-#
-# tox -e cython-coverage
-#
-# will produce a HTML report showing line-by-line coverage
-#
-# can be used to run standard tests on Python 2.7, 3.5, 3.6, and 3.7 or a
-# run with coverage but without coverage of Cython files using
-#
-# tox -e ENV
-#
-# where env is one of the values in envlist or all environments (slow)
-#
-# tox
-#
-##############################################################################
-[tox]
-envlist =
- py38
- py39
- py310
- coverage
- cython-coverage
-
-[testenv]
-deps =
- -rrequirements.txt
- cython
- matplotlib
- joblib
- pytest>=6
- pytest-xdist
-setenv =
- MKL_NUM_THREADS=1
- NUMEXPR_NUM_THREADS=1
- OMP_NUM_THREADS=1
- OPENBLAS_NUM_THREADS=1
-changedir = {homedir}
-commands =
- python -c "import statsmodels;statsmodels.test(['-n 2'], exit=True)"
-
-[testenv:coverage]
-usedevelop = True
-deps =
- {[testenv]deps}
- pytest-cov
-setenv =
- {[testenv]setenv}
- SM_CYTHON_COVERAGE=false
-changedir = {toxinidir}
-commands =
- coverage erase
- pytest -n 2 --cov-report html --cov=statsmodels statsmodels
-
-[testenv:cython-coverage]
-usedevelop = True
-deps =
- {[testenv]deps}
- pytest-cov
-setenv =
- {[testenv]setenv}
- SM_CYTHON_COVERAGE=true
-changedir = {toxinidir}
-commands =
- coverage erase
- pytest -n 2 --cov-report html --cov=statsmodels statsmodels