Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugs about fit SLIMClassifier, BayesianRuleSetClassifier, SlipperClassifier, TaoTreeRegressor #186

Open
jckkvs opened this issue Jul 31, 2023 · 0 comments

Comments

@jckkvs
Copy link
Contributor

jckkvs commented Jul 31, 2023

I tested the fit functions of almost all classifiers and regressors.
As a result of the tests, the following four models: SLIMClassifier, BayesianRuleSetClassifier, SlipperClassifier, and TaoTreeRegressor, failed to fit.

import pytest
from imodels import *
from sklearn.datasets import make_regression, make_classification


classifiers = [
    SLIMClassifier(),
    OptimalRuleListClassifier(),
    GreedyRuleListClassifier(),
    OneRClassifier(),
    BoostedRulesClassifier(),
    BayesianRuleSetClassifier(),
    RuleFitClassifier(),
    SkopeRulesClassifier(),
    SlipperClassifier(),
    C45TreeClassifier(),
    GreedyTreeClassifier(),
    FIGSClassifier(),
    FIGSClassifierCV(),
    HSTreeClassifier(),
    HSTreeClassifierCV(),
    TaoTreeClassifier(),
]

# not test
classifiers_for_discretized_X = [
    BayesianRuleListClassifier(),
    FPLassoClassifier(),
    FPSkopeClassifier(),
]

regressors = [
    SLIMRegressor(),
    BoostedRulesRegressor(),
    RuleFitRegressor(),
    GreedyTreeRegressor(),
    FIGSRegressor(),
    FIGSRegressorCV(),
    HSTreeRegressor(),
    HSTreeRegressorCV(),
    TaoTreeRegressor(),
]

# not test
regressors_for_discretized_X = [
    FPLassoRegressor(),
]


@pytest.mark.parametrize("classifier", classifiers)
def test_fit_classifier(classifier) -> None:
    X, y = make_classification(n_samples=25, n_features=5)
    classifier.fit(X, y)


@pytest.mark.parametrize("regressor", regressors)
def test_fit_regressor(regressor) -> None:
    X, y = make_regression(n_samples=25, n_features=5)
    regressor.fit(X, y)

Here are the test results

_____________________________________________ test_fit_classifier[classifier1] ______________________________________________

classifier = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})

    @pytest.mark.parametrize("classifier", classifiers)
    def test_fit_classifier(classifier) -> None:
        X, y = make_classification(n_samples=25, n_features=5)
>       classifier_ = clone(classifier)

tests\test_fit_print.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

estimator = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})

    def clone(estimator, *, safe=True):
        """Construct a new unfitted estimator with the same parameters.

        Clone does a deep copy of the model in an estimator
        without actually copying attached data. It returns a new estimator
        with the same parameters that has not been fitted on any data.

        Parameters
        ----------
        estimator : {list, tuple, set} of estimator instance or a single \
                estimator instance
            The estimator or group of estimators to be cloned.
        safe : bool, default=True
            If safe is False, clone will fall back to a deep copy on objects
            that are not estimators.

        Returns
        -------
        estimator : object
            The deep copy of the input, an estimator if input is an estimator.

        Notes
        -----
        If the estimator's `random_state` parameter is an integer (or if the
        estimator doesn't have a `random_state` parameter), an *exact clone* is
        returned: the clone and the original estimator will give the exact same
        results. Otherwise, *statistical clone* is returned: the clone might
        return different results from the original estimator. More details can be
        found in :ref:`randomness`.
        """
        estimator_type = type(estimator)
        # XXX: not handling dictionaries
        if estimator_type in (list, tuple, set, frozenset):
            return estimator_type([clone(e, safe=safe) for e in estimator])
        elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
            if not safe:
                return copy.deepcopy(estimator)
            else:
                if isinstance(estimator, type):
                    raise TypeError(
                        "Cannot clone object. "
                        + "You should provide an instance of "
                        + "scikit-learn estimator instead of a class."
                    )
                else:
                    raise TypeError(
                        "Cannot clone object '%s' (type %s): "
                        "it does not seem to be a scikit-learn "
                        "estimator as it does not implement a "
                        "'get_params' method." % (repr(estimator), type(estimator))
                    )

        klass = estimator.__class__
>       new_object_params = estimator.get_params(deep=False)
E       TypeError: CorelsClassifier.get_params() got an unexpected keyword argument 'deep'

..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:87: TypeError
_____________________________________________ test_fit_classifier[classifier5] ______________________________________________

classifier = BayesianRuleSetClassifier()

    @pytest.mark.parametrize("classifier", classifiers)
    def test_fit_classifier(classifier) -> None:
        X, y = make_classification(n_samples=25, n_features=5)
        classifier_ = clone(classifier)
>       classifier_.fit(X, y)

tests\test_fit_print.py:53:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = BayesianRuleSetClassifier(alpha_l=[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
                          beta_l=[-0.0...
                                  25200.0, 21000.0, 12000.0, 4500.0, 1000.0,
                                  100.0])
X =           X0        X1        X2        X3        X4
0   1.856723 -0.676788 -2.081929  0.139416  1.345762
1   0.942105...948  2.517298
23 -0.556286 -2.165002 -0.522723  1.466807 -0.796446
24 -1.369548  1.188797 -0.544919 -0.542191 -0.878127
y = array([1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1.,
       1., 0., 0., 0., 1., 1., 0., 0.])
feature_names = ['X0', 'X1', 'X2', 'X3', 'X4'], init = [], verbose = False

    def fit(self, X, y, feature_names: list = None, init=[], verbose=False):
        '''
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data
        y : array_like, shape = [n_samples]
            Labels

        feature_names : array_like, shape = [n_features], optional (default: [])
            String labels for each feature.
            If empty and X is a DataFrame, column labels are used.
            If empty and X is not a DataFrame, then features are simply enumerated
        '''
        # check inputs
        self.attr_level_num = defaultdict(int)  # any missing value defaults to 0
        self.attr_names = []

        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
        np.random.seed(self.random_state)

        # convert to pandas DataFrame
        X = pd.DataFrame(X, columns=feature_names)

        for i, name in enumerate(X.columns):
            self.attr_level_num[name] += 1
            self.attr_names.append(name)
        self.attr_names_orig = deepcopy(self.attr_names)
        self.attr_names = list(set(self.attr_names))

        # set up patterns
        self._set_pattern_space()

        # parameter checking
        if self.alpha_l is None or self.beta_l is None or len(self.alpha_l) != self.maxlen or len(
                self.beta_l) != self.maxlen:
            if verbose:
                print('No or wrong input for alpha_l and beta_l - the model will use default parameters.')
            self.C = [1.0 / self.maxlen] * self.maxlen
            self.C.insert(0, -1)
            self.alpha_l = [10] * (self.maxlen + 1)
            self.beta_l = [10 * self.pattern_space[i] / self.C[i] for i in range(self.maxlen + 1)]
        else:
            self.alpha_l = [1] + list(self.alpha_l)
            self.beta_l = [1] + list(self.beta_l)

        # setup
        self._generate_rules(X, y, verbose)
        n_rules_current = len(self.rules_)
        self.rules_len_list = [len(rule) for rule in self.rules_]
        maps = defaultdict(list)
        T0 = 1000  # initial temperature for simulated annealing
        split = 0.7 * self.num_iterations

        # run simulated annealing
        for chain in range(self.num_chains):
            # initialize with a random pattern set
            if init != []:
                rules_curr = init.copy()
            else:
>               assert n_rules_current > 1, f'Only {n_rules_current} potential rules found, change hyperparams to allow for more'
E               AssertionError: Only 0 potential rules found, change hyperparams to allow for more

imodels\rule_set\brs.py:147: AssertionError
--------------------------------------------------- Captured stdout call ----------------------------------------------------
mat.shape (25, 13626)




p1.shape (13626,) pp.shape (13626,) cond_entropy.shape
_____________________________________________ test_fit_classifier[classifier12] _____________________________________________

classifier = <imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>

    @pytest.mark.parametrize("classifier", classifiers)
    def test_fit_classifier(classifier) -> None:
        X, y = make_classification(n_samples=25, n_features=5)
>       classifier_ = clone(classifier)

tests\test_fit_print.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

estimator = <imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>

    def clone(estimator, *, safe=True):
        """Construct a new unfitted estimator with the same parameters.

        Clone does a deep copy of the model in an estimator
        without actually copying attached data. It returns a new estimator
        with the same parameters that has not been fitted on any data.

        Parameters
        ----------
        estimator : {list, tuple, set} of estimator instance or a single \
                estimator instance
            The estimator or group of estimators to be cloned.
        safe : bool, default=True
            If safe is False, clone will fall back to a deep copy on objects
            that are not estimators.

        Returns
        -------
        estimator : object
            The deep copy of the input, an estimator if input is an estimator.

        Notes
        -----
        If the estimator's `random_state` parameter is an integer (or if the
        estimator doesn't have a `random_state` parameter), an *exact clone* is
        returned: the clone and the original estimator will give the exact same
        results. Otherwise, *statistical clone* is returned: the clone might
        return different results from the original estimator. More details can be
        found in :ref:`randomness`.
        """
        estimator_type = type(estimator)
        # XXX: not handling dictionaries
        if estimator_type in (list, tuple, set, frozenset):
            return estimator_type([clone(e, safe=safe) for e in estimator])
        elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
            if not safe:
                return copy.deepcopy(estimator)
            else:
                if isinstance(estimator, type):
                    raise TypeError(
                        "Cannot clone object. "
                        + "You should provide an instance of "
                        + "scikit-learn estimator instead of a class."
                    )
                else:
>                   raise TypeError(
                        "Cannot clone object '%s' (type %s): "
                        "it does not seem to be a scikit-learn "
                        "estimator as it does not implement a "
                        "'get_params' method." % (repr(estimator), type(estimator))
                    )
E                   TypeError: Cannot clone object '<imodels.tree.figs.FIGSClassifierCV object at 0x0000024E24343580>' (type <class 'imodels.tree.figs.FIGSClassifierCV'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.

..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:79: TypeError
______________________________________________ test_fit_regressor[regressor5] _______________________________________________

regressor = <imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>

    @pytest.mark.parametrize("regressor", regressors)
    def test_fit_regressor(regressor) -> None:
        X, y = make_regression(n_samples=25, n_features=5)
>       regressor_ = clone(regressor)

tests\test_fit_print.py:59:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

estimator = <imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>

    def clone(estimator, *, safe=True):
        """Construct a new unfitted estimator with the same parameters.

        Clone does a deep copy of the model in an estimator
        without actually copying attached data. It returns a new estimator
        with the same parameters that has not been fitted on any data.

        Parameters
        ----------
        estimator : {list, tuple, set} of estimator instance or a single \
                estimator instance
            The estimator or group of estimators to be cloned.
        safe : bool, default=True
            If safe is False, clone will fall back to a deep copy on objects
            that are not estimators.

        Returns
        -------
        estimator : object
            The deep copy of the input, an estimator if input is an estimator.

        Notes
        -----
        If the estimator's `random_state` parameter is an integer (or if the
        estimator doesn't have a `random_state` parameter), an *exact clone* is
        returned: the clone and the original estimator will give the exact same
        results. Otherwise, *statistical clone* is returned: the clone might
        return different results from the original estimator. More details can be
        found in :ref:`randomness`.
        """
        estimator_type = type(estimator)
        # XXX: not handling dictionaries
        if estimator_type in (list, tuple, set, frozenset):
            return estimator_type([clone(e, safe=safe) for e in estimator])
        elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
            if not safe:
                return copy.deepcopy(estimator)
            else:
                if isinstance(estimator, type):
                    raise TypeError(
                        "Cannot clone object. "
                        + "You should provide an instance of "
                        + "scikit-learn estimator instead of a class."
                    )
                else:
>                   raise TypeError(
                        "Cannot clone object '%s' (type %s): "
                        "it does not seem to be a scikit-learn "
                        "estimator as it does not implement a "
                        "'get_params' method." % (repr(estimator), type(estimator))
                    )
E                   TypeError: Cannot clone object '<imodels.tree.figs.FIGSRegressorCV object at 0x0000024E24343A00>' (type <class 'imodels.tree.figs.FIGSRegressorCV'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.

..\..\..\Anaconda3\envs\py310\lib\site-packages\sklearn\base.py:79: TypeError
______________________________________________ test_fit_regressor[regressor8] _______________________________________________

regressor = TaoTreeRegressor()

    @pytest.mark.parametrize("regressor", regressors)
    def test_fit_regressor(regressor) -> None:
        X, y = make_regression(n_samples=25, n_features=5)
        regressor_ = clone(regressor)
>       regressor_.fit(X, y)

tests\test_fit_print.py:60:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = TaoTreeRegressor()
X = array([[ 1.37335581e+00,  5.41363727e-01, -1.19638750e-01,
        -1.72968975e-01,  3.49329569e-01],
       [-1.55348...3.96825794e-01],
       [ 3.04033564e-01, -1.14044020e+00,  1.57034218e-01,
         1.33012401e+00,  1.06648724e-01]])
y = array([ 103.98582366,  -83.95949067,   23.68672904, -136.32004647,
        -86.99044435,    1.25016217,   65.46450731,... 188.81385195,   86.91833492,
        -77.45621107,   34.3125444 ,   92.98970577,  -43.27341573,
         17.77429275])
feature_names = ['X0', 'X1', 'X2', 'X3', 'X4'], sample_weight = None

    def fit(self, X, y=None, feature_names=None, sample_weight=None):
        """
        Params
        ------
        _sample_weight: array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.
            Splits that would create child nodes with net zero or negative weight
            are ignored while searching for a split in each node.
        """
        X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
        if isinstance(self, RegressorMixin):
>           raise Warning('TAO Regression is not yet tested')
E           Warning: TAO Regression is not yet tested

imodels\tree\tao.py:115: Warning
_______________________________________ test_fit_before_print_classifier[classifier1] _______________________________________

classifier = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})

    @pytest.mark.parametrize("classifier", classifiers)
    def test_fit_before_print_classifier(classifier) -> None:
>       print(classifier)

tests\test_fit_print.py:65:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = CorelsClassifier ({'c': 0.01, 'n_iter': 10000, 'map_type': 'prefix', 'policy': 'lower_bound', 'verbosity': [], 'ablation': 0, 'max_card': 2, 'min_support': 0.01})

    def __str__(self):
        if corels_supported:
            if self.str_print is not None:
                return 'OptimalRuleList:\n\n' + self.str_print
            else:
>               return 'OptimalRuleList:\n\n' + self.rl_.__str__()
E               AttributeError: 'OptimalRuleListClassifier' object has no attribute 'rl_'

imodels\rule_list\corels_wrapper.py:240: AttributeError
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant