-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
to apply different metadata to different output
- Loading branch information
1 parent
93167c1
commit a564496
Showing
18 changed files
with
413 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,303 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"init_cell": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# load libraries\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"pd.set_option('display.latex.repr',True)\n", | ||
"pd.set_option('display.latex.longtable',False)\n", | ||
"pd.set_option('display.latex.escape',False)\n", | ||
"\n", | ||
"%matplotlib inline\n", | ||
"import matplotlib.pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"df = pd.DataFrame({'A': [np.random.randint(0, 100) for i in range(1000)],\n", | ||
" 'B': [np.random.randint(0, 100) for i in range(1000)],\n", | ||
" 'C': [np.random.randint(0, 100) for i in range(1000)]})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from IPython.display import display\n", | ||
"from IPython.display import display_latex\n", | ||
"from IPython.display import display_markdown" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def print_describe(var):\n", | ||
" \n", | ||
" kind = var.dtype.name\n", | ||
" data = []\n", | ||
" value = []\n", | ||
"\n", | ||
" # Variable name\n", | ||
" data.append('Variable')\n", | ||
" value.append(var.name)\n", | ||
" # Data type\n", | ||
" data.append('Data type')\n", | ||
" value.append(var.dtype)\n", | ||
" # Data type name\n", | ||
" data.append('Data type name')\n", | ||
" value.append(var.dtype.name)\n", | ||
" # content\n", | ||
" data.append('Not NULL')\n", | ||
" value.append(var.count())\n", | ||
" # NULLs\n", | ||
" data.append('NULLs')\n", | ||
" value.append(len(var) - var.count())\n", | ||
"\n", | ||
" df = pd.DataFrame({'Data': data, 'Value': value},\n", | ||
" columns=['Data', 'Value'])\n", | ||
" display(df,metadata={'ipub':{\"table\":{\"caption\":False}}})\n", | ||
"\n", | ||
" return" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def print_top_bottom(var):\n", | ||
" \n", | ||
" vc = var.value_counts()\n", | ||
" if len(vc) == 0:\n", | ||
" return\n", | ||
"\n", | ||
" kind = var.dtype.name\n", | ||
" n = min(len(vc), 5)\n", | ||
" desc_n = range(n)\n", | ||
" desc_k = list(vc.keys())[:n]\n", | ||
" desc_v = list(vc)[:n]\n", | ||
"\n", | ||
" vc = var.value_counts(ascending=True)\n", | ||
" asc_n = range(len(vc), 0, -1)[:n]\n", | ||
" asc_k = list(vc.keys())[:n]\n", | ||
" asc_v = list(vc)[:n]\n", | ||
"\n", | ||
" df = pd.DataFrame({'TOP: Idx': desc_n, 'T.Data': desc_k, 'T.Count': desc_v,\n", | ||
" 'BOTTOM: Idx': asc_n, 'B.Data': asc_k, 'B.Count': asc_v},\n", | ||
" columns=['TOP: Idx', 'T.Data', 'T.Count', \n", | ||
" 'BOTTOM: Idx', 'B.Data', 'B.Count'])\n", | ||
" display(df,metadata={'ipub':{\"table\":{\"caption\":'Column %s' % var.name}}})\n", | ||
" return" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def print_bar_chart(var):\n", | ||
" \n", | ||
" ax = var.plot(figsize=(15, 5))\n", | ||
" display(ax.get_figure(),metadata={'ipub':{\"figure\":{\"caption\":'Column %s' % var.name}}})\n", | ||
" plt.close()\n", | ||
" return" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true, | ||
"ipub": { | ||
"ignore": true | ||
} | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def EDA_univariate(var):\n", | ||
" \n", | ||
" display_markdown('## Describe', raw=True)\n", | ||
" print_describe(var)\n", | ||
"\n", | ||
" display_markdown('## Most/Least common', raw=True)\n", | ||
" print_top_bottom(var)\n", | ||
"\n", | ||
" display_markdown('## Chart', raw=True)\n", | ||
" print_bar_chart(var)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Full Report\n", | ||
"For each column will have:\n", | ||
"\n", | ||
"- Title (markdown) \n", | ||
"- Details (pandas table) \n", | ||
"- Most/Least common (pandas table) \n", | ||
"- Chart (image/matplotlib) \n", | ||
"- **LaTeX** (`\\newpage`)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"ipub": { | ||
"equation": { | ||
"label": "abc" | ||
}, | ||
"figure": { | ||
"caption": "Figure caption.", | ||
"label": "figure:flabel", | ||
"placement": "H" | ||
}, | ||
"mkdown": true, | ||
"table": { | ||
"alternate": "gray!20", | ||
"caption": "An example of a table created with pandas dataframe.", | ||
"label": "table:example", | ||
"placement": "H" | ||
}, | ||
"text": { | ||
"asfloat": true, | ||
"caption": "", | ||
"format": { | ||
"basicstyle": "\\small" | ||
}, | ||
"label": "text:example_text", | ||
"placement": "H", | ||
"widefigure": false | ||
} | ||
}, | ||
"scrolled": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"for col_name in df.columns[0:3]:\n", | ||
" \n", | ||
" display_markdown('# Column: %s' % col_name, raw=True)\n", | ||
"\n", | ||
" col = df[col_name]\n", | ||
" kind = col.dtype.name\n", | ||
"\n", | ||
" EDA_univariate(col)\n", | ||
" \n", | ||
" display_markdown('\\\\newpage', raw=True)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"celltoolbar": "Edit Metadata", | ||
"hide_input": false, | ||
"ipub": { | ||
"listcode": false, | ||
"listfigures": true, | ||
"listtables": true, | ||
"titlepage": { | ||
"author": "The Author", | ||
"email": "professional@author.com", | ||
"institution": [ | ||
"Katra" | ||
], | ||
"subtitle": "360", | ||
"supervisors": [ | ||
"The Boss" | ||
], | ||
"tagline": "A tagline for the report.", | ||
"title": "Data Source" | ||
}, | ||
"toc": true | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.1" | ||
}, | ||
"toc": { | ||
"colors": { | ||
"hover_highlight": "#DAA520", | ||
"navigate_num": "#000000", | ||
"navigate_text": "#333333", | ||
"running_highlight": "#FF0000", | ||
"selected_highlight": "#FFD700", | ||
"sidebar_border": "#EEEEEE", | ||
"wrapper_background": "#FFFFFF" | ||
}, | ||
"moveMenuLeft": true, | ||
"nav_menu": { | ||
"height": "31px", | ||
"width": "252px" | ||
}, | ||
"navigate_menu": true, | ||
"number_sections": true, | ||
"sideBar": true, | ||
"threshold": 4, | ||
"toc_cell": false, | ||
"toc_section_display": "block", | ||
"toc_window_display": false, | ||
"widenNotebook": false | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = '0.6.0' | ||
__version__ = '0.6.1' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.