Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Adjust eval environment for stack depth #9047

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 5 additions & 8 deletions statsmodels/base/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from statsmodels.base.optimizer import Optimizer
import statsmodels.base.wrapper as wrap
from statsmodels.formula import handle_formula_data
from statsmodels.formula.formulatools import advance_eval_env
from statsmodels.stats.contrast import (
ContrastResults,
WaldTestResults,
Expand Down Expand Up @@ -188,14 +189,10 @@ def from_formula(cls, formula, data, subset=None, drop_cols=None,
# TODO: subset could use syntax. issue #469.
if subset is not None:
data = data.loc[subset]
eval_env = kwargs.pop('eval_env', None)
if eval_env is None:
eval_env = 2
elif eval_env == -1:
from patsy import EvalEnvironment
eval_env = EvalEnvironment({})
elif isinstance(eval_env, int):
eval_env += 1 # we're going down the stack again

advance_eval_env(kwargs)
eval_env = kwargs.pop('eval_env')

missing = kwargs.get('missing', 'drop')
if missing == 'none': # with patsy it's drop or raise. let's raise.
missing = 'raise'
Expand Down
2 changes: 2 additions & 0 deletions statsmodels/discrete/conditional_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import statsmodels.base.wrapper as wrap
from statsmodels.discrete.discrete_model import (MultinomialResults,
MultinomialResultsWrapper)
from statsmodels.formula.formulatools import advance_eval_env
import collections
import warnings
import itertools
Expand Down Expand Up @@ -204,6 +205,7 @@ def from_formula(cls,
if "0+" not in formula.replace(" ", ""):
warnings.warn("Conditional models should not include an intercept")

advance_eval_env(kwargs)
model = super(_ConditionalModel, cls).from_formula(
formula, data=data, groups=groups, *args, **kwargs)

Expand Down
25 changes: 25 additions & 0 deletions statsmodels/discrete/tests/test_conditional.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,31 @@ def test_logit_1d():
assert_allclose(result.bse, np.r_[1.295155], rtol=1e-5)


def test_logit_formula():
"""Test that ConditionalLogit uses the right environment for formulas."""

def times_two(x):
return 2 * x

groups = np.repeat([0, 1], 50)
exog = np.linspace(-2, 2, len(groups))

error = np.linspace(-1, 1, len(groups)) # Needed for within-group variance
logit_link = 1 / (1 + np.exp(exog + groups)) + error
endog = (logit_link > 0.5).astype(int)

data = pd.DataFrame({"exog": exog, "groups": groups, "endog": endog})

result_direct = ConditionalLogit(endog, times_two(exog), groups=groups).fit()

result_formula = ConditionalLogit.from_formula(
"endog ~ 0 + times_two(exog)", groups="groups", data=data
).fit()

assert_allclose(result_direct.params, result_formula.params)
assert_allclose(result_direct.bse, result_formula.bse)


def test_logit_2d():

y = np.r_[0, 1, 0, 1, 0, 1, 0, 1, 1, 1]
Expand Down
2 changes: 2 additions & 0 deletions statsmodels/duration/hazard_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from statsmodels.base import model
import statsmodels.base.model as base
from statsmodels.formula.formulatools import advance_eval_env
from statsmodels.tools.decorators import cache_readonly
from statsmodels.compat.pandas import Appender

Expand Down Expand Up @@ -424,6 +425,7 @@ def from_formula(cls, formula, data, status=None, entry=None,
import warnings
warnings.warn("PHReg formulas should not include any '0' or '1' terms")

advance_eval_env(kwargs)
mod = super(PHReg, cls).from_formula(formula, data,
status=status, entry=entry, strata=strata,
offset=offset, subset=subset, ties=ties,
Expand Down
19 changes: 19 additions & 0 deletions statsmodels/duration/tests/test_phreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,25 @@ def test_formula(self):
assert_allclose(rslt1.bse, rslt2.bse)
assert_allclose(rslt1.bse, rslt3.bse)

def test_formula_environment(self):
"""Test that PHReg uses the right environment for formulas."""

def times_two(x):
return 2 * x

rng = np.random.default_rng(0)

exog = rng.uniform(size=100)
endog = np.exp(exog) * -np.log(rng.uniform(size=len(exog)))
data = pd.DataFrame({"endog": endog, "exog": exog})

result_direct = PHReg(endog, times_two(exog)).fit()

result_formula = PHReg.from_formula("endog ~ times_two(exog)", data=data).fit()

assert_allclose(result_direct.params, result_formula.params)
assert_allclose(result_direct.bse, result_formula.bse)

def test_formula_cat_interactions(self):

time = np.r_[1, 2, 3, 4, 5, 6, 7, 8, 9]
Expand Down
24 changes: 24 additions & 0 deletions statsmodels/formula/formulatools.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,27 @@
exog_names = model_results.model.exog_names
LC = linear_constraint(test_formula, exog_names)
return LC


def advance_eval_env(kwargs):
"""
Adjusts the keyword arguments for from_formula to account for the patsy
eval environment being passed down once on the stack. Adjustments are
made in place.

Parameters
----------
kwargs : dict
The dictionary of keyword arguments passed to `from_formula`.
"""

eval_env = kwargs.get("eval_env", None)
if eval_env is None:
kwargs["eval_env"] = 2
elif eval_env == -1:
from patsy import EvalEnvironment
kwargs["eval_env"] = EvalEnvironment({})

Check warning on line 131 in statsmodels/formula/formulatools.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/formula/formulatools.py#L130-L131

Added lines #L130 - L131 were not covered by tests
elif isinstance(eval_env, int):
kwargs["eval_env"] += 1

return kwargs
2 changes: 2 additions & 0 deletions statsmodels/genmod/generalized_estimating_equations.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
# used for wrapper:
import statsmodels.regression.linear_model as lm
import statsmodels.base.wrapper as wrap
from statsmodels.formula.formulatools import advance_eval_env

from statsmodels.genmod import families
from statsmodels.genmod.generalized_linear_model import GLM, GLMResults
Expand Down Expand Up @@ -756,6 +757,7 @@ def from_formula(cls, formula, groups, data, subset=None,
family = kwargs["family"]
del kwargs["family"]

advance_eval_env(kwargs)
model = super(GEE, cls).from_formula(formula, data=data, subset=subset,
groups=groups, time=time,
offset=offset,
Expand Down
2 changes: 2 additions & 0 deletions statsmodels/genmod/qif.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import statsmodels.regression.linear_model as lm
import statsmodels.base.wrapper as wrap
from statsmodels.tools.decorators import cache_readonly
from statsmodels.formula.formulatools import advance_eval_env


class QIFCovariance:
Expand Down Expand Up @@ -330,6 +331,7 @@ def from_formula(cls, formula, groups, data, subset=None,
if isinstance(groups, str):
groups = data[groups]

advance_eval_env(kwargs)
model = super(QIF, cls).from_formula(
formula, data=data, subset=subset,
groups=groups, *args, **kwargs)
Expand Down
40 changes: 40 additions & 0 deletions statsmodels/genmod/tests/test_gee.py
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,46 @@ def test_formulas(self):

check_wrapper(rslt2)

def test_formula_environment(self):
"""Test that GEE uses the right environment for formulas."""

n = 100
rng = np.random.default_rng(34234)
X1 = rng.normal(size=n)
Y = X1 + rng.normal(size=n)
Time = rng.uniform(size=n)
groups = np.kron(lrange(20), np.ones(5))

data = pd.DataFrame({"Y": Y, "X1": X1, "Time": Time, "groups": groups})

va = cov_struct.Autoregressive(grid=False)
family = families.Gaussian()

def times_two(x):
return 2 * x

mat = np.concatenate((np.ones((n, 1)), times_two(X1[:, None])), axis=1)
result_direct = gee.GEE(
Y, mat, groups, time=Time, family=family, cov_struct=va
).fit()
assert result_direct is not None

result_formula = gee.GEE.from_formula(
"Y ~ times_two(X1)",
groups,
data,
time=Time,
family=family,
cov_struct=va,
).fit()
assert result_formula is not None

assert_almost_equal(
result_direct.params,
result_formula.params,
decimal=8,
)

def test_compare_logit(self):

vs = cov_struct.Independence()
Expand Down
34 changes: 34 additions & 0 deletions statsmodels/genmod/tests/test_qif.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,37 @@ def test_formula(cov_struct):
if not isinstance(cov_struct, QIFIndependence):
_ = result2.bic
_ = result2.aic


def test_formula_environment():
"""Test that QIF uses the right environment for formulas."""

rng = np.random.default_rng(3423)

x1 = rng.normal(size=100)
y = x1 + rng.normal(size=100)
groups = np.kron(np.arange(25), np.ones(4))

def times_two(x):
return 2 * x

cov_struct = QIFIndependence()

result_direct = QIF(
y,
times_two(x1).reshape(-1, 1),
groups=groups,
cov_struct=cov_struct
).fit()

df = pd.DataFrame({"y": y, "x1": x1, "groups": groups})

result_formula = QIF.from_formula(
"y ~ 0 + times_two(x1)",
groups="groups",
cov_struct=cov_struct,
data=df
).fit()

assert_allclose(result_direct.params, result_formula.params)
assert_allclose(result_direct.bse, result_formula.bse)