Skip to content

Commit

Permalink
Merge pull request #8780 from josef-pkt/ref_getdistribution
Browse files Browse the repository at this point in the history
REF: get_distribution, return 1-d instead of column frozen distribution
  • Loading branch information
josef-pkt committed Apr 13, 2023
2 parents dda8e3d + 992d72c commit 4afdb22
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 18 deletions.
9 changes: 6 additions & 3 deletions statsmodels/discrete/count_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
w = self.predict(params, exog=exog, exog_infl=exog_infl,
exposure=exposure, offset=offset, which="prob-main")

distr = self.distribution(mu[:, None], 1 - w[:, None])
# distr = self.distribution(mu[:, None], 1 - w[:, None])
distr = self.distribution(mu, 1 - w)
return distr


Expand Down Expand Up @@ -842,7 +843,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
exposure=exposure, offset=offset, which="mean-main")
w = self.predict(params, exog=exog, exog_infl=exog_infl,
exposure=exposure, offset=offset, which="prob-main")
distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
# distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
distr = self.distribution(mu, params[-1], p, 1 - w)
return distr


Expand Down Expand Up @@ -958,7 +960,8 @@ def get_distribution(self, params, exog=None, exog_infl=None,
w = self.predict(params, exog=exog, exog_infl=exog_infl,
exposure=exposure, offset=offset, which="prob-main")

distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
# distr = self.distribution(mu[:, None], params[-1], p, 1 - w[:, None])
distr = self.distribution(mu, params[-1], p, 1 - w)
return distr


Expand Down
26 changes: 18 additions & 8 deletions statsmodels/discrete/discrete_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,8 @@ def get_distribution(self, params, exog=None, offset=None):
Instance of frozen scipy distribution.
"""
mu = self.predict(params, exog=exog, offset=offset)
distr = stats.bernoulli(mu[:, None])
# distr = stats.bernoulli(mu[:, None])
distr = stats.bernoulli(mu)
return distr


Expand Down Expand Up @@ -1659,7 +1660,7 @@ def predict(self, params, exog=None, exposure=None, offset=None,
exposure=exposure, offset=offset,
)[:, None]
# uses broadcasting
return stats.poisson.pmf(y_values, mu)
return stats.poisson._pmf(y_values, mu)
else:
raise ValueError('Value of the `which` option is not recognized')

Expand Down Expand Up @@ -2281,7 +2282,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
"""
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
p = self.parameterization + 1
distr = genpoisson_p(mu[:, None], params[-1], p)
# distr = genpoisson_p(mu[:, None], params[-1], p)
distr = genpoisson_p(mu, params[-1], p)
return distr


Expand Down Expand Up @@ -3587,8 +3589,13 @@ def predict(self, params, exog=None, exposure=None, offset=None,
offset=offset
)
if y_values is None:
y_values = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
return distr.pmf(y_values)
y_values = np.arange(0, np.max(self.endog) + 1)
else:
y_values = np.asarray(y_values)

assert y_values.ndim == 1
y_values = y_values[..., None]
return distr.pmf(y_values).T

exog, offset, exposure = self._get_predict_arrays(
exog=exog,
Expand Down Expand Up @@ -3757,7 +3764,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
"""
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
if self.loglike_method == 'geometric':
distr = stats.geom(1 / (1 + mu[:, None]), loc=-1)
# distr = stats.geom(1 / (1 + mu[:, None]), loc=-1)
distr = stats.geom(1 / (1 + mu), loc=-1)
else:
if self.loglike_method == 'nb2':
p = 2
Expand All @@ -3768,7 +3776,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
q = 2 - p
size = 1. / alpha * mu**q
prob = size / (size + mu)
distr = nbinom(size[:, None], prob[:, None])
# distr = nbinom(size[:, None], prob[:, None])
distr = nbinom(size, prob)

return distr

Expand Down Expand Up @@ -4343,7 +4352,8 @@ def get_distribution(self, params, exog=None, exposure=None, offset=None):
"""
mu = self.predict(params, exog=exog, exposure=exposure, offset=offset)
size, prob = self.convert_params(params, mu)
distr = nbinom(size[:, None], prob[:, None])
# distr = nbinom(size[:, None], prob[:, None])
distr = nbinom(size, prob)
return distr


Expand Down
6 changes: 3 additions & 3 deletions statsmodels/discrete/tests/test_conditional.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ def test_logit_1d():
for x in -1, 0, 1, 2:
params = np.r_[x, ]
_, grad = model._denom_grad(0, params)
ngrad = approx_fprime(params, lambda x: model._denom(0, x))
ngrad = approx_fprime(params, lambda x: model._denom(0, x)).squeeze()
assert_allclose(grad, ngrad)

# Check the gradient for the loglikelihood
for x in -1, 0, 1, 2:
grad = approx_fprime(np.r_[x, ], model.loglike)
grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
score = model.score(np.r_[x, ])
assert_allclose(grad, score, rtol=1e-4)

Expand Down Expand Up @@ -117,7 +117,7 @@ def test_poisson_1d():

# Check the gradient for the loglikelihood
for x in -1, 0, 1, 2:
grad = approx_fprime(np.r_[x, ], model.loglike)
grad = approx_fprime(np.r_[x, ], model.loglike).squeeze()
score = model.score(np.r_[x, ])
assert_allclose(grad, score, rtol=1e-4)

Expand Down
8 changes: 8 additions & 0 deletions statsmodels/discrete/tests/test_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def test_distr(case):
# res = mod.fit()
params_dgp = params
distr = mod.get_distribution(params_dgp)
assert distr.pmf(1).ndim == 1
try:
y2 = distr.rvs(size=(nobs, 1)).squeeze()
except ValueError:
Expand All @@ -378,7 +379,14 @@ def test_distr(case):
assert_allclose(res.resid_pearson, (y2 - mean) / np.sqrt(var_), rtol=1e-13)

if not issubclass(cls_model, BinaryModel):
# smoke, shape, consistency test
probs = res.predict(which="prob", y_values=np.arange(5))
assert probs.shape == (len(mod.endog), 5)
probs2 = res.get_prediction(
which="prob", y_values=np.arange(5), average=True)
assert_allclose(probs2.predicted, probs.mean(0), rtol=1e-10)
dia = res.get_diagnostic()
dia.probs_predicted
# fig = dia.plot_probs();
# fig.suptitle(cls_model.__name__ + repr(kwds), fontsize=16)

Expand Down
7 changes: 4 additions & 3 deletions statsmodels/discrete/truncated_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,13 @@ def predict(self, params, exog=None, exposure=None, offset=None,
return probs
elif which == 'prob-base':
if y_values is not None:
counts = np.atleast_2d(y_values)
counts = np.asarray(y_values)
else:
counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
counts = np.arange(0, np.max(self.endog)+1)

probs = self.model_main.predict(
params, exog=exog, exposure=np.exp(exposure),
offset=offset, which="prob", y_values=counts)[:, None]
offset=offset, which="prob", y_values=counts)
return probs
elif which == 'var':
mu = np.exp(linpred)
Expand Down
62 changes: 62 additions & 0 deletions statsmodels/genmod/generalized_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2149,6 +2149,68 @@ def get_influence(self, observed=True):
hat_matrix_diag=hat_matrix_diag)
return infl

def get_distribution(self, exog=None, exposure=None,
offset=None, var_weights=1., n_trials=1.):
"""
Return a instance of the predictive distribution.
Parameters
----------
scale : scalar
The scale parameter.
exog : array_like
The predictor variable matrix.
offset : array_like or None
Offset variable for predicted mean.
exposure : array_like or None
Log(exposure) will be added to the linear prediction.
var_weights : array_like
1d array of variance (analytic) weights. The default is None.
n_trials : int
Number of trials for the binomial distribution. The default is 1
which corresponds to a Bernoulli random variable.
Returns
-------
gen
Instance of a scipy frozen distribution based on estimated
parameters.
Use the ``rvs`` method to generate random values.
Notes
-----
Due to the behavior of ``scipy.stats.distributions objects``, the
returned random number generator must be called with ``gen.rvs(n)``
where ``n`` is the number of observations in the data set used
to fit the model. If any other value is used for ``n``, misleading
results will be produced.
"""
# Note this is mostly a copy of GLM.get_prediction
# calling here results.predict avoids the exog check and trasnform

if isinstance(self.model.family, (families.Binomial, families.Poisson,
families.NegativeBinomial)):
# use scale=1, independent of QMLE scale for discrete
scale = 1.
if self.scale != 1.:
msg = "using scale=1, no exess dispersion in distribution"
warnings.warn(msg, UserWarning)
else:
scale = self.scale

mu = self.predict(exog, exposure, offset, which="mean")

kwds = {}
if (np.any(n_trials != 1) and
isinstance(self.model.family, families.Binomial)):

kwds["n_trials"] = n_trials

distr = self.model.family.get_distribution(
mu, scale, var_weights=var_weights, **kwds)
return distr


@Appender(base.LikelihoodModelResults.remove_data.__doc__)
def remove_data(self):
# GLM has alias/reference in result instance
Expand Down
10 changes: 10 additions & 0 deletions statsmodels/genmod/tests/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,15 @@ def test_get_distribution(self):
var_ = res1.predict(which="var_unscaled")
assert_allclose(var_ * res_scale, var_endog, rtol=1e-13)

# check get_distribution of results instance
if getattr(self, "has_edispersion", False):
with pytest.warns(UserWarning, match="using scale=1"):
distr3 = res1.get_distribution()
else:
distr3 = res1.get_distribution()
for k in distr2.kwds:
assert_allclose(distr3.kwds[k], distr2.kwds[k], rtol=1e-13)


class CheckComparisonMixin:

Expand Down Expand Up @@ -865,6 +874,7 @@ def setup_class(cls):
res2 = Committee()
res2.aic_R += 2 # They do not count a degree of freedom for the scale
cls.res2 = res2
cls.has_edispersion = True

# FIXME: enable or delete
# def setup_method(self):
Expand Down
2 changes: 2 additions & 0 deletions statsmodels/othermod/betareg.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def __init__(self, endog, exog, exog_precision=None,
self._init_keys.extend(['exog_precision'])
self._init_keys.extend(['link', 'link_precision'])
self._null_drop_keys = ['exog_precision']
del kwds['extra_params_names']
self._check_kwargs(kwds)
self.results_class = BetaResults
self.results_class_wrapper = BetaResultsWrapper

Expand Down
11 changes: 11 additions & 0 deletions statsmodels/othermod/tests/test_beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
import io
import os

import pytest

import numpy as np
from numpy.testing import assert_allclose, assert_equal
import pandas as pd
import patsy
from statsmodels.api import families
from statsmodels.tools.sm_exceptions import (
ValueWarning,
)
from statsmodels.othermod.betareg import BetaModel
from .results import results_betareg as resultsb

Expand Down Expand Up @@ -126,6 +131,12 @@ def test_precision_formula(self):
assert_close(rslt.params, self.meth_fit.params, 1e-10)
assert isinstance(rslt.params, pd.Series)

with pytest.warns(ValueWarning, match="unknown kwargs"):
BetaModel.from_formula(self.model, methylation,
exog_precision_formula='~ age',
link_precision=links.Identity(),
junk=False)

def test_scores(self):
model, Z = self.model, self.Z
for link in (links.Identity(), links.Log()):
Expand Down
5 changes: 4 additions & 1 deletion statsmodels/tools/numdiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ def approx_fprime(x, f, epsilon=None, args=(), kwargs={}, centered=False):
f(*((x-ei,)+args), **kwargs))/(2 * epsilon[k])
ei[k] = 0.0

return grad.squeeze().T
if n == 1:
return grad.T
else:
return grad.squeeze().T


def _approx_fprime_scalar(x, f, epsilon=None, args=(), kwargs={},
Expand Down

0 comments on commit 4afdb22

Please sign in to comment.