You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I use HyperoptEstimator to train the model, I want to calculate logloss, so I manually set several classifiers to support predict_proba, but it will report errorError: Found input variables with inconsistent numbers of samples: [1818794, 79078].
What is the reason?
Following is my code:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from hpsklearn import sklearn_RandomForestClassifier,extra_trees,random_forest,knn,svc
from hyperopt import hp
from random import choice
def algorithms(name):
classifiers = [
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fit(self, X, y, EX_list, valid_size, n_folds, cv_shuffle, warm_start, random_state, weights)
778 increment = min(self.fit_increment,
779 adjusted_max_evals - len(self.trials.trials))
--> 780 fit_iter.send(increment)
781 if filename is not None:
782 with open(filename, 'wb') as dump_file:
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fit_iter(self, X, y, EX_list, valid_size, n_folds, cv_shuffle, warm_start, random_state, weights, increment)
688 # so we notice them.
689 catch_eval_exceptions=False,
--> 690 return_argmin=False, # -- in case no success so far
691 )
692 else:
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in run(self, N, block_until_done)
225 else:
226 # -- loop over trials and do the jobs directly
--> 227 self.serial_evaluate()
228
229 try:
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in serial_evaluate(self, N)
139 ctrl = base.Ctrl(self.trials, current_trial=trial)
140 try:
--> 141 result = self.domain.evaluate(spec, ctrl)
142 except Exception as e:
143 logger.info('job exception: %s' % str(e))
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fn_with_timeout(*args, **kwargs)
651 assert fn_rval[0] in ('raise', 'return')
652 if fn_rval[0] == 'raise':
--> 653 raise fn_rval[1]
654
655 # -- remove potentially large objects from the rval
ValueError: Found input variables with inconsistent numbers of samples: [1818794, 79078]
The text was updated successfully, but these errors were encountered:
Sounds like the shapes of your labels and predictions are not in alignment. I faced a similar problem while fitting a regression model . The problem in my case was, Number of rows in X was not equal to number of rows in y. You likely get problems because you remove rows containing nulls in X_train and y_train independent of each other. y_train probably has few, or no nulls and X_train probably has some. So when you remove a row in X_train and the same row is not removed in y_train it will cause your data to be unsynced and have different lenghts. Instead you should remove nulls before you separate X and y.
In most case, x as your feature parameter and y as your predictor. But your feature parameter should not be 1D. So check the shape of x and if it is 1D, then convert it from 1D to 2D.
When I use HyperoptEstimator to train the model, I want to calculate logloss, so I manually set several classifiers to support predict_proba, but it will report errorError: Found input variables with inconsistent numbers of samples: [1818794, 79078].
What is the reason?
Following is my code:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from hpsklearn import sklearn_RandomForestClassifier,extra_trees,random_forest,knn,svc
from hyperopt import hp
from random import choice
def algorithms(name):
classifiers = [
kf = StratifiedKFold(n_splits=5,shuffle=True)
use sigmoid kernal to train model
Hyper_logloss = []
for train_index, test_index in kf.split(X,y):
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
estim = HyperoptEstimator(classifier=algorithms("clf"),algo=tpe.suggest,loss_fn=log_loss,continuous_loss_fn=True,trial_timeout=3600)
estim.fit(X_train, y_train)
ValueError Traceback (most recent call last)
in
19 y_train, y_test = y.iloc[train_index], y.iloc[test_index]
20 estim = HyperoptEstimator(classifier=algorithms("clf"),algo=tpe.suggest,loss_fn=log_loss,continuous_loss_fn=True,trial_timeout=3600)
---> 21 estim.fit(X_train, y_train)
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fit(self, X, y, EX_list, valid_size, n_folds, cv_shuffle, warm_start, random_state, weights)
778 increment = min(self.fit_increment,
779 adjusted_max_evals - len(self.trials.trials))
--> 780 fit_iter.send(increment)
781 if filename is not None:
782 with open(filename, 'wb') as dump_file:
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fit_iter(self, X, y, EX_list, valid_size, n_folds, cv_shuffle, warm_start, random_state, weights, increment)
688 # so we notice them.
689 catch_eval_exceptions=False,
--> 690 return_argmin=False, # -- in case no success so far
691 )
692 else:
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in fmin(fn, space, algo, max_evals, trials, rstate, allow_trials_fmin, pass_expr_memo_ctrl, catch_eval_exceptions, verbose, return_argmin, points_to_evaluate, max_queue_len, show_progressbar)
386 catch_eval_exceptions=catch_eval_exceptions,
387 return_argmin=return_argmin,
--> 388 show_progressbar=show_progressbar,
389 )
390
~/anaconda3/lib/python3.7/site-packages/hyperopt/base.py in fmin(self, fn, space, algo, max_evals, rstate, verbose, pass_expr_memo_ctrl, catch_eval_exceptions, return_argmin, show_progressbar)
637 catch_eval_exceptions=catch_eval_exceptions,
638 return_argmin=return_argmin,
--> 639 show_progressbar=show_progressbar)
640
641
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in fmin(fn, space, algo, max_evals, trials, rstate, allow_trials_fmin, pass_expr_memo_ctrl, catch_eval_exceptions, verbose, return_argmin, points_to_evaluate, max_queue_len, show_progressbar)
405 show_progressbar=show_progressbar)
406 rval.catch_eval_exceptions = catch_eval_exceptions
--> 407 rval.exhaust()
408 if return_argmin:
409 return trials.argmin
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in exhaust(self)
260 def exhaust(self):
261 n_done = len(self.trials)
--> 262 self.run(self.max_evals - n_done, block_until_done=self.asynchronous)
263 self.trials.refresh()
264 return self
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in run(self, N, block_until_done)
225 else:
226 # -- loop over trials and do the jobs directly
--> 227 self.serial_evaluate()
228
229 try:
~/anaconda3/lib/python3.7/site-packages/hyperopt/fmin.py in serial_evaluate(self, N)
139 ctrl = base.Ctrl(self.trials, current_trial=trial)
140 try:
--> 141 result = self.domain.evaluate(spec, ctrl)
142 except Exception as e:
143 logger.info('job exception: %s' % str(e))
~/anaconda3/lib/python3.7/site-packages/hyperopt/base.py in evaluate(self, config, ctrl, attach_attachments)
842 memo=memo,
843 print_node_on_error=self.rec_eval_print_node_on_error)
--> 844 rval = self.fn(pyll_rval)
845
846 if isinstance(rval, (float, int, np.number)):
~/anaconda3/hyperopt-sklearn/hpsklearn/estimator.py in fn_with_timeout(*args, **kwargs)
651 assert fn_rval[0] in ('raise', 'return')
652 if fn_rval[0] == 'raise':
--> 653 raise fn_rval[1]
654
655 # -- remove potentially large objects from the rval
ValueError: Found input variables with inconsistent numbers of samples: [1818794, 79078]
The text was updated successfully, but these errors were encountered: