TypeError: unhashable type: 'numpy.ndarray' #86

miche2020 · 2022-01-28T18:54:15Z

Hi everyone!
I have some issues with scorecard my error is TypeError: unhashable type: 'numpy.ndarray' in
woebin.py bins = dict(zip(xs, pool.starmap(woebin2, args)) .
How I can fix this problem?

miche2020 · 2022-01-29T18:21:42Z

TypeError Traceback (most recent call last)
/var/folders/cz/xwjfsm6j63g54wbyx25npzqm0000gn/T/ipykernel_23001/37453442.py in
----> 1 sc.woebin(test,y='y')

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin(dt, y, x, var_skip, breaks_list, special_values, stop_limit, count_distr_limit, bin_num_limit, positive, no_cores, print_step, method, ignore_const_cols, ignore_datetime_cols, check_cate_num, replace_blank, save_breaks_list, **kwargs)
971 print(('{:'+str(len(str(xs_len)))+'.0f}/{} {}').format(i, xs_len, x_i), flush=True)
972 # woebining on one variable
--> 973 bins[x_i] = woebin2(
974 dtm = pd.DataFrame({'y':dt[y], 'variable':x_i, 'value':dt[x_i]}),
975 breaks=breaks_list[x_i] if (breaks_list is not None) and (x_i in breaks_list.keys()) else None,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2(dtm, breaks, spl_val, init_count_distr, count_distr_limit, stop_limit, bin_num_limit, method)
735 if method == 'tree':
736 # 2.tree-like optimal binning
--> 737 bin_list = woebin2_tree(
738 dtm, init_count_distr=init_count_distr, count_distr_limit=count_distr_limit,
739 stop_limit=stop_limit, bin_num_limit=bin_num_limit, breaks=breaks, spl_val=spl_val)

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2_tree(dtm, init_count_distr, count_distr_limit, stop_limit, bin_num_limit, breaks, spl_val)
511 binning_tree = None
512 while (IVchg >= stop_limit) and (step_num+1 <= min([bin_num_limit, len_brks])):
--> 513 binning_tree = woebin2_tree_add_1brkp(dtm, initial_binning, count_distr_limit, bestbreaks)
514 # best breaks
515 bestbreaks = binning_tree.loc[lambda x: x['bstbrkp'] != float('-inf'), 'bstbrkp'].tolist()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in woebin2_tree_add_1brkp(dtm, initial_binning, count_distr_limit, bestbreaks)
455 dtm_rows = len(dtm.index)
456 # total_iv for all best breaks
--> 457 total_iv_all_brks = total_iv_all_breaks(initial_binning, bestbreaks, dtm_rows)
458 # bestbreaks: total_iv == max(total_iv) & min(count_distr) >= count_distr_limit
459 bstbrk_maxiv = total_iv_all_brks.loc[lambda x: x['min_count_distr'] >= count_distr_limit]

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/scorecardpy/woebin.py in total_iv_all_breaks(initial_binning, bestbreaks, dtm_rows)
408 init_bin_all_breaks.loc[:,'bstbin'+str(i)] = pd.cut(init_bin_all_breaks['brkp'], bestbreaks_i, right=False, labels=labels)#.astype(str)
409 # best break dt
--> 410 total_iv_all_brks = pd.melt(
411 init_bin_all_breaks, id_vars=["variable", "good", "bad"], var_name='bstbin',
412 value_vars=['bstbin'+str(i) for i in breaks_set])\

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
867
868 op = GroupByApply(self, func, args, kwargs)
--> 869 result = op.agg()
870 if not is_dict_like(func) and result is not None:
871 return result

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in agg(self)
166
167 if is_dict_like(arg):
--> 168 return self.agg_dict_like()
169 elif is_list_like(arg):
170 # we require a list, but not a 'str'

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in agg_dict_like(self)
473 else:
474 # key used for column selection and output
--> 475 results = {
476 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
477 }

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/apply.py in (.0)
474 # key used for column selection and output
475 results = {
--> 476 key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
477 }
478

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
279 cyfunc = com.get_cython_func(func)
280 if cyfunc and not args and not kwargs:
--> 281 return getattr(self, cyfunc)()
282
283 if self.grouper.nkeys > 1:

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in sum(self, numeric_only, min_count, engine, engine_kwargs)
2187 # _agg_general() returns. GH #31422
2188 with com.temp_setattr(self, "observed", True):
-> 2189 result = self._agg_general(
2190 numeric_only=numeric_only,
2191 min_count=min_count,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _agg_general(self, numeric_only, min_count, alias, npfunc)
1504 with self._group_selection_context():
1505 # try a cython aggregation if we can
-> 1506 result = self._cython_agg_general(
1507 how=alias,
1508 alt=npfunc,

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
1590 # TypeError -> we may have an exception in trying to aggregate
1591 # continue and exclude the block
-> 1592 new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
1593
1594 if not is_ser and len(new_mgr) < len(data):

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/internals/base.py in grouped_reduce(self, func, ignore_failures)
184
185 arr = self.array
--> 186 res = func(arr)
187 index = default_index(len(res))
188

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in array_func(values)
1576 def array_func(values: ArrayLike) -> ArrayLike:
1577 try:
-> 1578 result = self.grouper._cython_operation(
1579 "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
1580 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in _cython_operation(self, kind, values, how, axis, min_count, **kwargs)
935 cy_op = WrappedCythonOp(kind=kind, how=how)
936
--> 937 ids, _, _ = self.group_info
938 ngroups = self.ngroups
939 return cy_op.cython_operation(

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in group_info(self)
832 @cache_readonly
833 def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
--> 834 comp_ids, obs_group_ids = self._get_compressed_codes()
835
836 ngroups = len(obs_group_ids)

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in _get_compressed_codes(self)
856 # The first returned ndarray may have any signed integer dtype
857 if len(self.groupings) > 1:
--> 858 group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
859 return compress_group_index(group_index, sort=self._sort)
860

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in codes(self)
791 @Property
792 def codes(self) -> list[np.ndarray]:
--> 793 return [ping.codes for ping in self.groupings]
794
795 @Property

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/ops.py in (.0)
791 @Property
792 def codes(self) -> list[np.ndarray]:
--> 793 return [ping.codes for ping in self.groupings]
794
795 @Property

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in codes(self)
620 return self._codes
621
--> 622 return self._codes_and_uniques[0]
623
624 @cache_readonly

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/_libs/properties.pyx in pandas._libs.properties.CachedProperty.get()

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in _codes_and_uniques(self)
688 else:
689 na_sentinel = -1
--> 690 codes, uniques = algorithms.factorize(
691 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
692 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, size_hint)
757 na_value = None
758
--> 759 codes, uniques = factorize_array(
760 values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
761 )

/opt/anaconda3/envs/monedo_scoring_v3_2/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize_array(values, na_sentinel, size_hint, na_value, mask)
558
559 table = hash_klass(size_hint or len(values))
--> 560 uniques, codes = table.factorize(
561 values, na_sentinel=na_sentinel, na_value=na_value, mask=mask
562 )

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.factorize()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable._unique()

TypeError: unhashable type: 'numpy.ndarray'

pinkiesky · 2022-02-07T11:55:38Z

Hi, @miche2020 . Downgrade to pandas==1.3.4 may help you.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TypeError: unhashable type: 'numpy.ndarray' #86

TypeError: unhashable type: 'numpy.ndarray' #86

miche2020 commented Jan 28, 2022

miche2020 commented Jan 29, 2022

pinkiesky commented Feb 7, 2022

TypeError: unhashable type: 'numpy.ndarray' #86

TypeError: unhashable type: 'numpy.ndarray' #86

Comments

miche2020 commented Jan 28, 2022

miche2020 commented Jan 29, 2022

pinkiesky commented Feb 7, 2022