Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error: Found array with 0 feature(s) & tpot unsupported set of arguments #331

Open
krkaufma opened this issue Sep 1, 2020 · 5 comments
Open

Comments

@krkaufma
Copy link

krkaufma commented Sep 1, 2020

I am running the following software in a conda virtual env:
Ubuntu 18.04
Python 3.6.8
automatminer 1.0.3.20200727 (installed with pip, not from conda)

Both errors are encountered when fitting a MatPipe pipe in express or debug mode.

First, the array with 0 features issue:

ValueError: Found array with 0 feature(s) (shape=(25, 0)) while a minimum of 1 is required by RobustScaler.

The stack trace:

ValueError Traceback (most recent call last)
in
1 # Create Matpipe in ‘express’ mode for recommended settings
2 pipe = MatPipe.from_preset(preset=“express”, n_jobs=22)
----> 3 pipe.fit(df=train_df, target=target_name)

~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
–> 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result

~/.local/lib/python3.6/site-packages/automatminer/pipeline.py in fit(self, df, target)
182 df = self.cleaner.fit_transform(df, target)
183 df = self.reducer.fit_transform(df, target)
–> 184 self.learner.fit(df, target)
185 logger.info(“MatPipe successfully fit.”)
186 self.post_fit_df = df

~/.local/lib/python3.6/site-packages/automatminer/utils/log.py in wrapper(*args, **kwargs)
94 self = args[0]
95 logger.info("{}Starting {}.".format(self._log_prefix, operation))
—> 96 result = meth(*args, **kwargs)
97 logger.info("{}Finished {}.".format(self._log_prefix, operation))
98 return result

~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
–> 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result

~/.local/lib/python3.6/site-packages/automatminer/automl/adaptors.py in fit(self, df, target, **fit_kwargs)
135 self._features = df.drop(columns=target).columns.tolist()
136 self._fitted_target = target
–> 137 self._backend = self._backend.fit(X, y, **fit_kwargs)
138 return self
139

~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
744 # raise the exception if it’s our last attempt
745 if attempt == (attempts - 1):
–> 746 raise e
747 return self
748

~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
736
737 self._update_top_pipeline()
–> 738 self._summary_of_best_pipeline(features, target)
739 # Delete the temporary cache before exiting
740 self._cleanup_memory()

~/.local/lib/python3.6/site-packages/tpot/base.py in summary_of_best_pipeline(self, features, target)
860 with warnings.catch_warnings():
861 warnings.simplefilter(‘ignore’)
–> 862 self.pareto_front_fitted_pipelines[str(pipeline)].fit(features, target)
863
864 def predict(self, features):

~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
348 This estimator
349 “”"
–> 350 Xt, fit_params = self._fit(X, y, **fit_params)
351 with _print_elapsed_time(‘Pipeline’,
352 self._log_message(len(self.steps) - 1)):

~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
313 message_clsname=‘Pipeline’,
314 message=self._log_message(step_idx),
–> 315 **fit_params_steps[name])
316 # Replace the transformer of the step with the fitted
317 # transformer. This is necessary when loading the transformer

~/.local/lib/python3.6/site-packages/joblib/memory.py in call(self, *args, **kwargs)
563
564 def call(self, *args, **kwargs):
–> 565 return self._cached_call(args, kwargs)[0]
566
567 def getstate(self):

~/.local/lib/python3.6/site-packages/joblib/memory.py in _cached_call(self, args, kwargs, shelving)
529
530 if must_call:
–> 531 out, metadata = self.call(*args, **kwargs)
532 if self.mmap_mode is not None:
533 # Memmap the output at the first call to be consistent with

~/.local/lib/python3.6/site-packages/joblib/memory.py in call(self, *args, **kwargs)
725 if self._verbose > 0:
726 print(format_call(self.func, args, kwargs))
–> 727 output = self.func(*args, **kwargs)
728 self.store_backend.dump_item(
729 [func_id, args_id], output, verbose=self._verbose)

~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, ‘fit_transform’):
–> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)

~/.local/lib/python3.6/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
572 else:
573 # fit method of arity 2 (supervised transformation)
–> 574 return self.fit(X, y, **fit_params).transform(X)
575
576

~/.local/lib/python3.6/site-packages/sklearn/preprocessing/_data.py in fit(self, X, y)
1198 # the quantiles
1199 X = check_array(X, accept_sparse=‘csc’, estimator=self,
-> 1200 dtype=FLOAT_DTYPES, force_all_finite=‘allow-nan’)
1201
1202 q_min, q_max = self.quantile_range

~/.local/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
592 " a minimum of %d is required%s."
593 % (n_features, array.shape, ensure_min_features,
–> 594 context))
595
596 if warn_on_dtype and dtype_orig is not None and array.dtype != dtype_orig:

ValueError: Found array with 0 feature(s) (shape=(25, 0)) while a minimum of 1 is required by RobustScaler.

Second, the tpot unsupported set of arguments:
It appears to me that the genetic algorithm is attempting to pass invalid combinations of arguments to sklearn models. This occurs in ‘debug’ and ‘express’ presets.

ValueError: Unsupported set of arguments: The combination of penalty=‘l2’ and loss=‘epsilon_insensitive’ are not supported when dual=False, Parameters: penalty=‘l2’, loss=‘epsilon_insensitive’, dual=False

The stack trace:

_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=1 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
_pre_test decorator: _random_mutation_operator: num_test=0 Found array with 0 feature(s) (shape=(22, 0)) while a minimum of 1 is required by MaxAbsScaler…
Pipeline encountered that has previously been evaluated during the optimization process. Using the score from the previous evaluation.

ValueError Traceback (most recent call last)
in
1 # Create Matpipe in ‘debug’ mode for quick test
2 dummy_pipe = MatPipe.from_preset(preset=“debug”, n_jobs=20)
----> 3 dummy_pipe.fit(df=train_df, target=target_name)

~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
–> 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result

~/.local/lib/python3.6/site-packages/automatminer/pipeline.py in fit(self, df, target)
182 df = self.cleaner.fit_transform(df, target)
183 df = self.reducer.fit_transform(df, target)
–> 184 self.learner.fit(df, target)
185 logger.info(“MatPipe successfully fit.”)
186 self.post_fit_df = df

~/.local/lib/python3.6/site-packages/automatminer/utils/log.py in wrapper(*args, **kwargs)
94 self = args[0]
95 logger.info("{}Starting {}.".format(self._log_prefix, operation))
—> 96 result = meth(*args, **kwargs)
97 logger.info("{}Finished {}.".format(self._log_prefix, operation))
98 return result

~/.local/lib/python3.6/site-packages/automatminer/utils/pkg.py in wrapper(*args, **kwargs)
102 def wrapper(*args, **kwargs):
103 args[0].is_fit = False
–> 104 result = func(*args, **kwargs)
105 args[0].is_fit = True
106 return result

~/.local/lib/python3.6/site-packages/automatminer/automl/adaptors.py in fit(self, df, target, **fit_kwargs)
135 self._features = df.drop(columns=target).columns.tolist()
136 self._fitted_target = target
–> 137 self._backend = self._backend.fit(X, y, **fit_kwargs)
138 return self
139

~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
744 # raise the exception if it’s our last attempt
745 if attempt == (attempts - 1):
–> 746 raise e
747 return self
748

~/.local/lib/python3.6/site-packages/tpot/base.py in fit(self, features, target, sample_weight, groups)
736
737 self._update_top_pipeline()
–> 738 self._summary_of_best_pipeline(features, target)
739 # Delete the temporary cache before exiting
740 self._cleanup_memory()

~/.local/lib/python3.6/site-packages/tpot/base.py in summary_of_best_pipeline(self, features, target)
860 with warnings.catch_warnings():
861 warnings.simplefilter(‘ignore’)
–> 862 self.pareto_front_fitted_pipelines[str(pipeline)].fit(features, target)
863
864 def predict(self, features):

~/.local/lib/python3.6/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
352 self._log_message(len(self.steps) - 1)):
353 if self._final_estimator != ‘passthrough’:
–> 354 self._final_estimator.fit(Xt, y, **fit_params)
355 return self
356

~/.local/lib/python3.6/site-packages/sklearn/svm/classes.py in fit(self, X, y, sample_weight)
430 None, penalty, self.dual, self.verbose,
431 self.max_iter, self.tol, self.random_state, loss=self.loss,
–> 432 epsilon=self.epsilon, sample_weight=sample_weight)
433 self.coef = self.coef_.ravel()
434

~/.local/lib/python3.6/site-packages/sklearn/svm/_base.py in fit_liblinear(X, y, C, fit_intercept, intercept_scaling, class_weight, penalty, dual, verbose, max_iter, tol, random_state, multi_class, loss, epsilon, sample_weight)
933 dtype=np.float64)
934
–> 935 solver_type = get_liblinear_solver_type(multi_class, penalty, loss, dual)
936 raw_coef, n_iter = liblinear.train_wrap(
937 X, y_ind, sp.isspmatrix(X), solver_type, tol, bias, C,

~/.local/lib/python3.6/site-packages/sklearn/svm/_base.py in _get_liblinear_solver_type(multi_class, penalty, loss, dual)
791 raise ValueError('Unsupported set of arguments: %s, ’
792 'Parameters: penalty=%r, loss=%r, dual=
r
'
−
→
793
(error_string, penalty, loss, dual))
794
795

Original reports on matsci.org can be found here and here.

@gregheymans
Copy link

I have the same issue. Is there any solution now? :)

@jadelgadod
Copy link

@gregheymans, and @krkaufma. There is an update related to this issue, just uploaded on the automatminer forum. Not definitive but might be a partial solution to these kind of issues.

@gregheymans
Copy link

gregheymans commented Nov 30, 2020 via email

@sgbaird
Copy link
Contributor

sgbaird commented Jul 8, 2021

Is this issue going to be resolved in the codebase?

@sgbaird
Copy link
Contributor

sgbaird commented Sep 11, 2021

@ardunn, curious if there's an update in the works.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants