Skip to content

Commit

Permalink
ENH/REF: norms in resistant models, smaller ENH in older code
Browse files Browse the repository at this point in the history
  • Loading branch information
josef-pkt committed May 2, 2024
1 parent a3e739a commit 41b4bb2
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 19 deletions.
45 changes: 42 additions & 3 deletions statsmodels/robust/norms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

# TODO: add plots to weighting functions for online docs.
from . import tools as rtools


def _cabs(x):
Expand Down Expand Up @@ -41,6 +41,9 @@ class RobustNorm:

continuous = 1

def __repr__(self):
return self.__class__.__name__

def rho(self, z):
"""
The robust criterion estimator function.
Expand Down Expand Up @@ -862,12 +865,48 @@ class TukeyBiweight(RobustNorm):
def __init__(self, c=4.685):
self.c = c

def _set_tuning_param(self, c):
def __repr__(self):
return f"{self.__class__.__name__}(c={self.c})"

@classmethod
def get_tuning(cls, bp=None, eff=None):
"""Tuning parameter for given breakdown point or efficiency.
This currently only return values from a table.
Parameters
----------
bp : float in [0.05, 0.5] or None
Required breakdown point
Either bp or eff has to be specified, but not both.
eff : float or None
Required asymptotic efficiency.
Either bp or eff has to be specified, but not both.
Returns
-------
float : tuning parameter.
"""
if ((bp is None and eff is None) or
(bp is not None and eff is not None)):
raise ValueError("exactly one of bp and eff needs to be provided")

Check warning on line 893 in statsmodels/robust/norms.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/norms.py#L893

Added line #L893 was not covered by tests

if bp is not None:
return rtools.tukeybiweight_bp[bp]

Check warning on line 896 in statsmodels/robust/norms.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/norms.py#L896

Added line #L896 was not covered by tests
elif eff is not None:
return rtools.tukeybiweight_eff[eff]

Check warning on line 898 in statsmodels/robust/norms.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/norms.py#L898

Added line #L898 was not covered by tests

def _set_tuning_param(self, c, inplace=False):
"""Set and change the tuning parameter of the Norm.
Warning: this needs to wipe cached attributes that depend on the param.
"""
self.c = c
if inplace:
self.c = c
return self

Check warning on line 907 in statsmodels/robust/norms.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/norms.py#L906-L907

Added lines #L906 - L907 were not covered by tests
else:
return self.__class__(c=c)

def max_rho(self):
return self.rho(self.c)
Expand Down
138 changes: 122 additions & 16 deletions statsmodels/robust/resistant_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,53 @@

class RLMDetS(Model):

Check warning on line 19 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L19

Added line #L19 was not covered by tests
"""S-estimator for linear model with deterministic starts.
Notes
-----
This estimator combines the method of Fast-S regression (Saliban-Barrera
et al 2006) using starting sets similar to the deterministic estimation of
multivariate location and scatter DetS and DetMM of Hubert et al (2012).
References
----------
.. [1] Hubert, Mia, Peter J. Rousseeuw, and Tim Verdonck. 2012. “A
Deterministic Algorithm for Robust Location and Scatter.” Journal of
Computational and Graphical Statistics 21 (3): 618–37.
https://doi.org/10.1080/10618600.2012.672100.
.. [2] Hubert, Mia, Peter Rousseeuw, Dina Vanpaemel, and Tim Verdonck.
2015. “The DetS and DetMM Estimators for Multivariate Location and
Scatter.” Computational Statistics & Data Analysis 81 (January): 64–75.
https://doi.org/10.1016/j.csda.2014.07.013.
.. [3] Rousseeuw, Peter J., Stefan Van Aelst, Katrien Van Driessen, and
Jose Agulló. 2004. “Robust Multivariate Regression.”
Technometrics 46 (3): 293–305.
.. [4] Salibian-Barrera, Matías, and Víctor J. Yohai. 2006. “A Fast
Algorithm for S-Regression Estimates.” Journal of Computational and
Graphical Statistics 15 (2): 414–27.
"""

def __init__(self, endog, exog, norm=None, breakdown_point=0.5,

Check warning on line 54 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L54

Added line #L54 was not covered by tests
col_indices=None, include_endog=False):
super().__init__(endog, exog)

Check warning on line 56 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L56

Added line #L56 was not covered by tests

if norm is None:
norm = rnorms.TukeyBiweight(c=1.547)
scale_bias = 0.1995
self.mscale = rscale.MScale(norm, scale_bias)
else:
raise ValueError()
norm = rnorms.TukeyBiweight()

Check warning on line 59 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L59

Added line #L59 was not covered by tests

tune = norm.get_tuning(bp=breakdown_point)
c = tune[0]
scale_bias = tune[2]
norm = norm._set_tuning_param(c, inplace=False)
self.mscale = rscale.MScale(norm, scale_bias)

Check warning on line 65 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L61-L65

Added lines #L61 - L65 were not covered by tests

self.norm = norm
# need tuning params for given breakdown point
# if type(norm) is a class then create an instance
# if norm is an instance, then just use it
# self.norm._set_tuning_param(???)
# data for robust mahalanobis distance of starting sets
self.breakdown_point = breakdown_point

Check warning on line 68 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L67-L68

Added lines #L67 - L68 were not covered by tests

# TODO: detect constant
Expand All @@ -45,6 +73,7 @@ def __init__(self, endog, exog, norm=None, breakdown_point=0.5,
else:
exog_start = self.exog[:, col_indices]

Check warning on line 74 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L74

Added line #L74 was not covered by tests

# data for robust mahalanobis distance of starting sets
if include_endog:
self.data_start = np.column_stack((endog, exog_start))

Check warning on line 78 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L78

Added line #L78 was not covered by tests
else:
Expand Down Expand Up @@ -104,12 +133,90 @@ def fit(self, h, maxiter=100, maxiter_step=5, start_params_extra=None):


class RLMDetSMM(RLMDetS):

Check warning on line 135 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L135

Added line #L135 was not covered by tests
"""MM-estimator with S-estimator starting values
"""MM-estimator with S-estimator starting values.
Parameters
----------
endog : array-like, 1-dim
Dependent, endogenous variable.
exog array-like, 1-dim
Inependent, exogenous regressor variables.
norm : robust norm
Redescending robust norm used for S- and MM-estimation.
Default is TukeyBiweight.
efficiency : float in (0, 1)
Asymptotic efficiency of the MM-estimator (used in second stage).
breakdown_point : float in (0, 0.5)
Breakdown point of the preliminary S-estimator.
col_indices : None or array-like of ints
Index of columns of exog to use in the mahalanobis distance computation
for the starting sets of the S-estimator.
Default is all exog except first column (constant). Todo: will change
when we autodetect the constant column
include_endog : bool
If true, then the endog variable is combined with the exog variables
to compute the the mahalanobis distances for the starting sets of the
S-estimator.
"""
def __init__(self, endog, exog, norm=None, efficiency=0.95,

Check warning on line 162 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L162

Added line #L162 was not covered by tests
breakdown_point=0.5, col_indices=None, include_endog=False):
super().__init__(

Check warning on line 164 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L164

Added line #L164 was not covered by tests
endog,
exog,
norm=norm,
breakdown_point=breakdown_point,
col_indices=col_indices,
include_endog=include_endog
)

def fit(self, h=None, binding=False, start=None):
norm_m = rnorms.TukeyBiweight(c=4.685061)
self.efficiency = efficiency

Check warning on line 173 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L173

Added line #L173 was not covered by tests
if norm is None:
norm = rnorms.TukeyBiweight()

Check warning on line 175 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L175

Added line #L175 was not covered by tests

c = norm.get_tuning(eff=efficiency)[0]
norm = norm._set_tuning_param(c, inplace=False)
self.norm_mean = norm

Check warning on line 179 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L177-L179

Added lines #L177 - L179 were not covered by tests

def fit(self, h=None, scale_binding=False, start=None):

Check warning on line 181 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L181

Added line #L181 was not covered by tests
"""Estimate the model
Parameters
----------
h : int
The size of the initial sets for the S-estimator.
Default is .... (todo)
scale_binding : bool
If true, then the scale is fixed in the second stage M-estimation,
i.e. this is the MM-estimator.
If false, then the high breakdown point M-scale is used also in the
second stage M-estimation if that estimated scale is smaller than
the scale of the preliminary, first stage S-estimato.
start : tuple or None
If None, then the starting parameters and scale for the second
stage M-estimation are taken from the fist stage S-estimator.
Alternatively, the starting parameters and starting scale can be
provided by the user as tuple (start_params, start_scale). In this
case the first stage S-estimation in skipped.
maxiter, other optimization parameters are still missing (todo)
Returns
-------
results instance
Notes
-----
If scale_binding is false, then the estimator is a standard
MM-estimator with fixed scale in the second stage M-estimation.
If scale_binding is true, then the estimator will try to find an
estimate with lower M-scale using the same scale-norm rho as in the
first stage S-estimator. If the estimated scale, is not smaller than
then the scale estimated in the first stage S-estimator, then the
fixed scale MM-estimator is returned.
"""
norm_m = self.norm_mean

Check warning on line 219 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L219

Added line #L219 was not covered by tests
if start is None:
res_s = super().fit(h)
start_params = np.asarray(res_s.params)
Expand All @@ -118,23 +225,22 @@ def fit(self, h=None, binding=False, start=None):
start_params, start_scale = start
res_s = None

Check warning on line 226 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L225-L226

Added lines #L225 - L226 were not covered by tests

# mod_m = RLM(res_s.model.endog, res_s.model.exog, M=norm_m)
mod_m = RLM(self.endog, self.exog, M=norm_m)
res_mm = mod_m.fit(

Check warning on line 229 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L228-L229

Added lines #L228 - L229 were not covered by tests
start_params=start_params,
start_scale=start_scale,
update_scale=False
)

if not binding:
if not scale_binding:
# we can compute this first and skip MM if scale decrease
mod_sm = RLM(self.endog, self.exog, M=norm_m)
res_sm = mod_sm.fit(

Check warning on line 238 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L237-L238

Added lines #L237 - L238 were not covered by tests
start_params=start_params,
scale_est=self.mscale
)

if not binding and res_sm.scale < res_mm.scale:
if not scale_binding and res_sm.scale < res_mm.scale:
res = res_sm

Check warning on line 244 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L244

Added line #L244 was not covered by tests
else:
res = res_mm

Check warning on line 246 in statsmodels/robust/resistant_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/resistant_linear_model.py#L246

Added line #L246 was not covered by tests
Expand Down
2 changes: 2 additions & 0 deletions statsmodels/robust/robust_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ def fit(self, maxiter=50, tol=1e-8, scale_est='mad', init=None, cov='H1',
self.scale = self._estimate_scale(wls_results.resid)
elif start_scale:
self.scale = start_scale
if not update_scale:
self.scale_est = scale_est = "fixed"

Check warning on line 281 in statsmodels/robust/robust_linear_model.py

View check run for this annotation

Codecov / codecov/patch

statsmodels/robust/robust_linear_model.py#L281

Added line #L281 was not covered by tests

history = dict(params=[np.inf], scale=[])
if conv == 'coefs':
Expand Down
3 changes: 3 additions & 0 deletions statsmodels/robust/scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,9 @@ def __init__(self, chi_func, scale_bias):
self.chi_func = chi_func
self.scale_bias = scale_bias

def __repr__(self):
return repr(self.chi_func)

def __call__(self, data, **kwds):
return self.fit(data, **kwds)

Expand Down
30 changes: 30 additions & 0 deletions statsmodels/robust/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,33 @@ def func(c):
)

return res2

# ##### tables

tukeybiweight_bp = {
# breakdown point : (tuning parameter, efficiency, scale bias)
0.50: (1.547645, 0.286826, 0.199600),
0.45: (1.756059, 0.369761, 0.231281),
0.40: (1.987965, 0.461886, 0.263467),
0.35: (2.251831, 0.560447, 0.295793),
0.30: (2.560843, 0.661350, 0.327896),
0.25: (2.937015, 0.759040, 0.359419),
0.20: (3.420681, 0.846734, 0.390035),
0.15: (4.096255, 0.917435, 0.419483),
0.10: (5.182361, 0.966162, 0.447614),
0.05: (7.545252, 0.992424, 0.474424),
}

tukeybiweight_eff = {
#efficiency : (tuning parameter, breakdown point)
0.65: (2.523102, 0.305646),
0.70: (2.697221, 0.280593),
0.75: (2.897166, 0.254790),
0.80: (3.136909, 0.227597),
0.85: (3.443690, 0.197957),
0.90: (3.882662, 0.163779),
0.95: (4.685065, 0.119414),
0.97: (5.596823, 0.087088),
0.98: (5.920719, 0.078604),
0.99: (7.041392, 0.056969),
}

0 comments on commit 41b4bb2

Please sign in to comment.