New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug: Pivot fails for MultiIndex If existing index is used. #23955
Comments
Any updates on this? As I understand, currently the As of now, I solve this in a hacky way by generating a single index as a concatenation of the multiple levels of the original indices, pivot and then reconstructing the different levels of the MultiIndex by splitting the concatenated single Index. Following on @srajanpaliwal example: (df.reset_index()
.assign(new_index=lambda dd: dd['index_1'].str.cat(dd['index_2'], sep='_'))
.pivot(index='new_index', columns='label', values='value')
.assign(index_1=lambda dd: dd.index.str.split('_').str.get(0),
index_2=lambda dd: dd.index.str.split('_').str.get(1))
.set_index(['index_1', 'index_2'])) Output:
Either way, is there a reason why MultiIndex is not accepted with the |
Thanks for the solution #23955 (comment). If it saves someone the trouble, here is a generalization
|
slight adjustment of @gmacario comment for the sake of uniformity with the pivot api def multiindex_pivot(df, index=None, columns=None, values=None):
#https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
list_index = df[names].values
tuples_index = [tuple(i) for i in list_index] # hashable
df = df.assign(tuples_index=tuples_index)
df = df.pivot(index="tuples_index", columns=columns, values=values)
tuples_index = df.index # reduced
index = pd.MultiIndex.from_tuples(tuples_index, names=names)
df.index = index
return df usage: df.pipe(multiindex_pivot, index=['idx_column1', 'idx_column2'], columns='foo', values='bar') |
another slight enhancement that allows multiple def multiindex_pivot(df, index=None, columns=None, values=None):
# https://github.com/pandas-dev/pandas/issues/23955
if index is None:
names = list(df.index.names)
df = df.reset_index()
else:
names = index
df = df.assign(tuples_index=[tuple(i) for i in df[names].values]) # hashable
df = df.assign(tuples_columns=[tuple(i) for i in df[columns].values]) # hashable
df = df.pivot(index='tuples_index', columns='tuples_columns', values=values)
df.index = pd.MultiIndex.from_tuples(df.index, names=names) # reduced
df.columns = pd.MultiIndex.from_tuples(df.columns, names=columns) # reduced
return df usage: df.pipe(multiindex_pivot,
index=['idx_column1', 'idx_column2'],
columns=['col_column1', 'col_column2'],
values='bar') |
Yet another slightly improved version: def MultiIndex_pivot(df:pd.DataFrame,
index:str = None,
columns:str = None,
values:str = None) -> pd.DataFrame:
"""
https://github.com/pandas-dev/pandas/issues/23955
Usage:
df.pipe(MultiIndex_pivot, index = ['idx_column1', 'idx_column2'], columns = ['col_column1', 'col_column2'], values = 'bar')
"""
output_df = df.copy(deep = True)
if index is None:
names = list(output_df.index.names)
output_df.reset_index(drop = True, inplace = True)
else:
names = index
output_df = output_df.assign(tuples_index = [tuple(i) for i in output_df[names].values])
if isinstance(columns, list):
output_df = output_df.assign(tuples_columns = [tuple(i) for i in output_df[columns].values])
output_df = output_df.pivot(index = 'tuples_index', columns = 'tuples_columns', values = values)
output_df.columns = pd.MultiIndex.from_tuples([((x[0],) + x[1]) for x in output_df.columns], names = [None] + columns)
else:
output_df = output_df.pivot(index = 'tuples_index', columns = columns, values = values)
output_df.index = pd.MultiIndex.from_tuples(output_df.index, names = names)
return output_df Usage: df.pipe(MultiIndex_pivot,
index = ['idx_column1', 'idx_column2'],
columns = ['col_column1', 'col_column2'],
values = 'bar') Still, since pandas version 1.1.0 index and column lists are supported by the regular DataFrame.pivot . |
Code Sample, a copy-pastable example
Problem description
Pivot function give an error
NotImplementedError: isna is not defined for MultiIndex
. When index is set toNone
.Expected Output
Output of
pd.show_versions()
pandas: 0.23.4
pytest: 3.5.1
pip: 10.0.1
setuptools: 39.1.0
Cython: 0.28.2
numpy: 1.15.4
scipy: 1.1.0
pyarrow: None
xarray: None
IPython: 6.4.0
sphinx: 1.7.4
patsy: 0.5.0
dateutil: 2.7.3
pytz: 2018.4
blosc: None
bottleneck: 1.2.1
tables: 3.4.3
numexpr: 2.6.5
feather: None
matplotlib: 2.2.2
openpyxl: 2.5.3
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.4
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.7
pymysql: None
psycopg2: None
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None
The text was updated successfully, but these errors were encountered: