Skip to content

Commit

Permalink
added split outputs preprocessor
Browse files Browse the repository at this point in the history
to apply different metadata to different output
  • Loading branch information
chrisjsewell committed Jul 26, 2017
1 parent 93167c1 commit a564496
Show file tree
Hide file tree
Showing 18 changed files with 413 additions and 22 deletions.
17 changes: 6 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A workflow for creating and editing publication ready scientific reports and pre

![WorkFlow Example](/example_workflow.gif)

See [Example.ipynb](example/notebooks/Example.pdf), [Example.pdf](https://chrisjsewell.github.io/ipypublish/Example.view_pdf.html),
See [Example.ipynb](example/notebooks/Example.ipynb), [Example.pdf](https://chrisjsewell.github.io/ipypublish/Example.view_pdf.html),
[Example.html](https://chrisjsewell.github.io/ipypublish/Example.html) and
[Example.slides.html](https://chrisjsewell.github.io/ipypublish/Example.slides.html#/) for an example of the potential input/output.

Expand Down Expand Up @@ -156,9 +156,6 @@ This leads to the following logic flow (discussed further in the [Metadata Tags]
- "mkdown" for markdown text
- "text" for plain text

Note that this is principally envisioned for use with **one output per code cell**, but it will work in a limited capacity for multiple outputs (e.g. you will not be able to specify separate specificaions, like captions).
[TO COME: using `IPython.display(obj,metadata={"ipub":{}})` to provide specifications for individual outputs]

Packages, such as pandas and matplotlib, use jupyter notebooks [rich representation](http://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display) mechanics to store a single output in multiple formats. nbconvert (and hence ipypublish) then selects only the highest priority (compatible) format to be output. This allows, for example, for pandas DataFrames to be output as
latex tables in latex documents and html tables in html documents/slides.

Expand Down Expand Up @@ -303,21 +300,19 @@ test

## Metadata Tags

All information additional information, used to specify how a particular notebook/cell will be represented
when converted, is stored in the metadata under:
All information additional information, used to specify how a particular notebook/cell/output will be represented, when converted, is stored in the metadata under:

```json
{
"ipub": {}
}
```

To access metadata, in the Jupyter Notebook Toolbar:

- For notebook level: go to Edit -> Edit Notebook Metadata
- For cell level: go to View -> Cell Toolbar -> Edit Metadata and a button will appear above each cell.
[TO COME: output level, using `IPython.display(obj,metadata={"ipub":{}})`]
There are three levels of metadata:

- For notebook level: in the Jupyter Notebook Toolbar go to Edit -> Edit Notebook Metadata
- For cell level: in the Jupyter Notebook Toolbar go to View -> Cell Toolbar -> Edit Metadata and a button will appear above each cell.
- For output level: using `IPython.display.display(obj,metadata={"ipub":{}})`, you can set metadata specific to a certain output. Options set at the output level will override options set at the cell level. for an example of this, run the [MultiOutput_Example.ipynb](example/notebooks/MultiOutput_Example.ipynb).

**Please note**, setting a value to `"value":{}` is the same as `"value":false` so,
if you are not setting additional options, use `"value":true`.
Expand Down
303 changes: 303 additions & 0 deletions example/notebooks/MultiOutput_Example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"init_cell": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"# load libraries\n",
"import numpy as np\n",
"import pandas as pd\n",
"pd.set_option('display.latex.repr',True)\n",
"pd.set_option('display.latex.longtable',False)\n",
"pd.set_option('display.latex.escape',False)\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"df = pd.DataFrame({'A': [np.random.randint(0, 100) for i in range(1000)],\n",
" 'B': [np.random.randint(0, 100) for i in range(1000)],\n",
" 'C': [np.random.randint(0, 100) for i in range(1000)]})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"from IPython.display import display\n",
"from IPython.display import display_latex\n",
"from IPython.display import display_markdown"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"def print_describe(var):\n",
" \n",
" kind = var.dtype.name\n",
" data = []\n",
" value = []\n",
"\n",
" # Variable name\n",
" data.append('Variable')\n",
" value.append(var.name)\n",
" # Data type\n",
" data.append('Data type')\n",
" value.append(var.dtype)\n",
" # Data type name\n",
" data.append('Data type name')\n",
" value.append(var.dtype.name)\n",
" # content\n",
" data.append('Not NULL')\n",
" value.append(var.count())\n",
" # NULLs\n",
" data.append('NULLs')\n",
" value.append(len(var) - var.count())\n",
"\n",
" df = pd.DataFrame({'Data': data, 'Value': value},\n",
" columns=['Data', 'Value'])\n",
" display(df,metadata={'ipub':{\"table\":{\"caption\":False}}})\n",
"\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"def print_top_bottom(var):\n",
" \n",
" vc = var.value_counts()\n",
" if len(vc) == 0:\n",
" return\n",
"\n",
" kind = var.dtype.name\n",
" n = min(len(vc), 5)\n",
" desc_n = range(n)\n",
" desc_k = list(vc.keys())[:n]\n",
" desc_v = list(vc)[:n]\n",
"\n",
" vc = var.value_counts(ascending=True)\n",
" asc_n = range(len(vc), 0, -1)[:n]\n",
" asc_k = list(vc.keys())[:n]\n",
" asc_v = list(vc)[:n]\n",
"\n",
" df = pd.DataFrame({'TOP: Idx': desc_n, 'T.Data': desc_k, 'T.Count': desc_v,\n",
" 'BOTTOM: Idx': asc_n, 'B.Data': asc_k, 'B.Count': asc_v},\n",
" columns=['TOP: Idx', 'T.Data', 'T.Count', \n",
" 'BOTTOM: Idx', 'B.Data', 'B.Count'])\n",
" display(df,metadata={'ipub':{\"table\":{\"caption\":'Column %s' % var.name}}})\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"def print_bar_chart(var):\n",
" \n",
" ax = var.plot(figsize=(15, 5))\n",
" display(ax.get_figure(),metadata={'ipub':{\"figure\":{\"caption\":'Column %s' % var.name}}})\n",
" plt.close()\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"ipub": {
"ignore": true
}
},
"outputs": [],
"source": [
"def EDA_univariate(var):\n",
" \n",
" display_markdown('## Describe', raw=True)\n",
" print_describe(var)\n",
"\n",
" display_markdown('## Most/Least common', raw=True)\n",
" print_top_bottom(var)\n",
"\n",
" display_markdown('## Chart', raw=True)\n",
" print_bar_chart(var)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Full Report\n",
"For each column will have:\n",
"\n",
"- Title (markdown) \n",
"- Details (pandas table) \n",
"- Most/Least common (pandas table) \n",
"- Chart (image/matplotlib) \n",
"- **LaTeX** (`\\newpage`)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ipub": {
"equation": {
"label": "abc"
},
"figure": {
"caption": "Figure caption.",
"label": "figure:flabel",
"placement": "H"
},
"mkdown": true,
"table": {
"alternate": "gray!20",
"caption": "An example of a table created with pandas dataframe.",
"label": "table:example",
"placement": "H"
},
"text": {
"asfloat": true,
"caption": "",
"format": {
"basicstyle": "\\small"
},
"label": "text:example_text",
"placement": "H",
"widefigure": false
}
},
"scrolled": false
},
"outputs": [],
"source": [
"for col_name in df.columns[0:3]:\n",
" \n",
" display_markdown('# Column: %s' % col_name, raw=True)\n",
"\n",
" col = df[col_name]\n",
" kind = col.dtype.name\n",
"\n",
" EDA_univariate(col)\n",
" \n",
" display_markdown('\\\\newpage', raw=True)"
]
}
],
"metadata": {
"celltoolbar": "Edit Metadata",
"hide_input": false,
"ipub": {
"listcode": false,
"listfigures": true,
"listtables": true,
"titlepage": {
"author": "The Author",
"email": "professional@author.com",
"institution": [
"Katra"
],
"subtitle": "360",
"supervisors": [
"The Boss"
],
"tagline": "A tagline for the report.",
"title": "Data Source"
},
"toc": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "31px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion ipypublish/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.6.0'
__version__ = '0.6.1'
4 changes: 3 additions & 1 deletion ipypublish/export_plugins/html_ipypublish_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ipypublish.preprocessors.latextags_to_html import LatexTagsToHTML
from ipypublish.filters.replace_string import replace_string
from ipypublish.preprocessors.latex_doc_defaults import MetaDefaults
from ipypublish.preprocessors.split_outputs import SplitOutputs

cell_defaults = {
"ipub": {
Expand Down Expand Up @@ -51,7 +52,8 @@
oformat = 'HTML'
config = {'TemplateExporter.filters':{'replace_string':replace_string},
'Exporter.filters':{'replace_string':replace_string},
'Exporter.preprocessors':[MetaDefaults,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
'Exporter.preprocessors':[MetaDefaults,SplitOutputs,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
'SplitOutputs.split':True,
'MetaDefaults.cell_defaults':cell_defaults,
'MetaDefaults.nb_defaults':nb_defaults,
'LatexCaptions.add_prefix':True}
Expand Down
4 changes: 3 additions & 1 deletion ipypublish/export_plugins/html_ipypublish_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
from ipypublish.preprocessors.latex_doc_html import LatexDocHTML
from ipypublish.preprocessors.latextags_to_html import LatexTagsToHTML
from ipypublish.filters.replace_string import replace_string
from ipypublish.preprocessors.split_outputs import SplitOutputs

oformat = 'HTML'
config = {'TemplateExporter.filters':{'replace_string':replace_string},
'Exporter.filters':{'replace_string':replace_string},
'Exporter.preprocessors':[LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
'Exporter.preprocessors':[SplitOutputs,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
'SplitOutputs.split':True,
'LatexCaptions.add_prefix':True}

template = create_tpl([
Expand Down

0 comments on commit a564496

Please sign in to comment.