Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TypeError: unhashable type: 'numpy.ndarray' #86

Open
miche2020 opened this issue Jan 28, 2022 · 2 comments
Open

TypeError: unhashable type: 'numpy.ndarray' #86

miche2020 opened this issue Jan 28, 2022 · 2 comments

Comments

@miche2020
Copy link

Hi everyone!
I have some issues with scorecard my error is TypeError: unhashable type: 'numpy.ndarray' in
woebin.py bins = dict(zip(xs, pool.starmap(woebin2, args)) .
How I can fix this problem?

@miche2020
Copy link
Author

TypeError Traceback (most recent call last)
/var/folders/cz/xwjfsm6j63g54wbyx25npzqm0000gn/T/ipykernel_23001/37453442.py in
----> 1 sc.woebin(test,y='y')

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin(dt, y, x, var_skip, breaks_list, special_values, stop_limit, count_distr_limit, bin_num_limit, positive, no_cores, print_step, method, ignore_const_cols, ignore_datetime_cols, check_cate_num, replace_blank, save_breaks_list, **kwargs)
971 print(('{:'+str(len(str(xs_len)))+'.0f}/{} {}').format(i, xs_len, x_i), flush=True)
972 # woebining on one variable
--> 973 bins[x_i] = woebin2(
974 dtm = pd.DataFrame({'y':dt[y], 'variable':x_i, 'value':dt[x_i]}),
975 breaks=breaks_list[x_i] if (breaks_list is not None) and (x_i in breaks_list.keys()) else None,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2(dtm, breaks, spl_val, init_count_distr, count_distr_limit, stop_limit, bin_num_limit, method)
735 if method == 'tree':
736 # 2.tree-like optimal binning
--> 737 bin_list = woebin2_tree(
738 dtm, init_count_distr=init_count_distr, count_distr_limit=count_distr_limit,
739 stop_limit=stop_limit, bin_num_limit=bin_num_limit, breaks=breaks, spl_val=spl_val)

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2_tree(dtm, init_count_distr, count_distr_limit, stop_limit, bin_num_limit, breaks, spl_val)
511 binning_tree = None
512 while (IVchg >= stop_limit) and (step_num+1 <= min([bin_num_limit, len_brks])):
--> 513 binning_tree = woebin2_tree_add_1brkp(dtm, initial_binning, count_distr_limit, bestbreaks)
514 # best breaks
515 bestbreaks = binning_tree.loc[lambda x: x['bstbrkp'] != float('-inf'), 'bstbrkp'].tolist()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2_tree_add_1brkp(dtm, initial_binning, count_distr_limit, bestbreaks)
455 dtm_rows = len(dtm.index)
456 # total_iv for all best breaks
--> 457 total_iv_all_brks = total_iv_all_breaks(initial_binning, bestbreaks, dtm_rows)
458 # bestbreaks: total_iv == max(total_iv) & min(count_distr) >= count_distr_limit
459 bstbrk_maxiv = total_iv_all_brks.loc[lambda x: x['min_count_distr'] >= count_distr_limit]

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in total_iv_all_breaks(initial_binning, bestbreaks, dtm_rows)
408 init_bin_all_breaks.loc[:,'bstbin'+str(i)] = pd.cut(init_bin_all_breaks['brkp'], bestbreaks_i, right=False, labels=labels)#.astype(str)
409 # best break dt
--> 410 total_iv_all_brks = pd.melt(
411 init_bin_all_breaks, id_vars=["variable", "good", "bad"], var_name='bstbin',
412 value_vars=['bstbin'+str(i) for i in breaks_set])\

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
867
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in agg(self)
166
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in agg_dict_like(self)
473 else:
474 # key used for column selection and output
--> 475 results = {
476 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
477 }

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in (.0)
474 # key used for column selection and output
475 results = {
--> 476 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
477 }
478

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
279 cyfunc = com.get_cython_func(func)
280 if cyfunc and not args and not kwargs:
--> 281 return getattr(self, cyfunc)()
282
283 if self.grouper.nkeys > 1:

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in sum(self, numeric_only, min_count, engine, engine_kwargs)
2187 # _agg_general() returns. GH #31422
2188 with com.temp_setattr(self, "observed", True):
-> 2189 result = self._agg_general(
2190 numeric_only=numeric_only,
2191 min_count=min_count,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _agg_general(self, numeric_only, min_count, alias, npfunc)
1504 with self._group_selection_context():
1505 # try a cython aggregation if we can
-> 1506 result = self._cython_agg_general(
1507 how=alias,
1508 alt=npfunc,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
1590 # TypeError -> we may have an exception in trying to aggregate
1591 # continue and exclude the block
-> 1592 new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
1593
1594 if not is_ser and len(new_mgr) < len(data):

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/internals/base.py in grouped_reduce(self, func, ignore_failures)
184
185 arr = self.array
--> 186 res = func(arr)
187 index = default_index(len(res))
188

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in array_func(values)
1576 def array_func(values: ArrayLike) -> ArrayLike:
1577 try:
-> 1578 result = self.grouper._cython_operation(
1579 "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
1580 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in _cython_operation(self, kind, values, how, axis, min_count, **kwargs)
935 cy_op = WrappedCythonOp(kind=kind, how=how)
936
--> 937 ids, _, _ = self.group_info
938 ngroups = self.ngroups
939 return cy_op.cython_operation(

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in group_info(self)
832 @cache_readonly
833 def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
--> 834 comp_ids, obs_group_ids = self._get_compressed_codes()
835
836 ngroups = len(obs_group_ids)

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in _get_compressed_codes(self)
856 # The first returned ndarray may have any signed integer dtype
857 if len(self.groupings) > 1:
--> 858 group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
859 return compress_group_index(group_index, sort=self._sort)
860

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in codes(self)
791 @Property
792 def codes(self) -> list[np.ndarray]:
--> 793 return [ping.codes for ping in self.groupings]
794
795 @Property

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in (.0)
791 @Property
792 def codes(self) -> list[np.ndarray]:
--> 793 return [ping.codes for ping in self.groupings]
794
795 @Property

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in codes(self)
620 return self._codes
621
--> 622 return self._codes_and_uniques[0]
623
624 @cache_readonly

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in _codes_and_uniques(self)
688 else:
689 na_sentinel = -1
--> 690 codes, uniques = algorithms.factorize(
691 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
692 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, size_hint)
757 na_value = None
758
--> 759 codes, uniques = factorize_array(
760 values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
761 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize_array(values, na_sentinel, size_hint, na_value, mask)
558
559 table = hash_klass(size_hint or len(values))
--> 560 uniques, codes = table.factorize(
561 values, na_sentinel=na_sentinel, na_value=na_value, mask=mask
562 )

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.factorize()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable._unique()

TypeError: unhashable type: 'numpy.ndarray'

@pinkiesky
Copy link

Hi, @miche2020 . Downgrade to pandas==1.3.4 may help you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants