added split outputs preprocessor

to apply different metadata to different output
chrisjsewell · Jul 26, 2017 · a564496 · a564496
1 parent 93167c1
commit a564496
Show file tree

Hide file tree

Showing 18 changed files with 413 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ A workflow for creating and editing publication ready scientific reports and pre
 
 ![WorkFlow Example](/example_workflow.gif)
 
-See [Example.ipynb](example/notebooks/Example.pdf), [Example.pdf](https://chrisjsewell.github.io/ipypublish/Example.view_pdf.html),
+See [Example.ipynb](example/notebooks/Example.ipynb), [Example.pdf](https://chrisjsewell.github.io/ipypublish/Example.view_pdf.html),
 [Example.html](https://chrisjsewell.github.io/ipypublish/Example.html) and 
 [Example.slides.html](https://chrisjsewell.github.io/ipypublish/Example.slides.html#/) for an example of the potential input/output.
 
@@ -156,9 +156,6 @@ This leads to the following logic flow (discussed further in the [Metadata Tags]
 	- "mkdown" for markdown text
 	- "text" for plain text
 
-Note that this is principally envisioned for use with **one output per code cell**, but it will work in a limited capacity for multiple outputs (e.g. you will not be able to specify separate specificaions, like captions). 
-[TO COME: using `IPython.display(obj,metadata={"ipub":{}})` to provide specifications for individual outputs]
-
 Packages, such as pandas and matplotlib, use jupyter notebooks [rich representation](http://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display) mechanics to store a single output in multiple formats. nbconvert (and hence ipypublish) then selects only the highest priority (compatible) format to be output. This allows, for example, for pandas DataFrames to be output as 
 latex tables in latex documents and html tables in html documents/slides.
 
@@ -303,21 +300,19 @@ test
 
 ## Metadata Tags
 
-All information additional information, used to specify how a particular notebook/cell will be represented
-when converted, is stored in the metadata under:
+All information additional information, used to specify how a particular notebook/cell/output will be represented, when converted, is stored in the metadata under:
 
 ```json
 {
 		"ipub": {}
 }
 ```
 
-To access metadata, in the Jupyter Notebook Toolbar:
-
-- For notebook level: go to Edit -> Edit Notebook Metadata
-- For cell level: go to View -> Cell Toolbar -> Edit Metadata and a button will appear above each cell.
-[TO COME: output level, using `IPython.display(obj,metadata={"ipub":{}})`]
+There are three levels of metadata:
 
+- For notebook level: in the Jupyter Notebook Toolbar go to Edit -> Edit Notebook Metadata
+- For cell level: in the Jupyter Notebook Toolbar go to View -> Cell Toolbar -> Edit Metadata and a button will appear above each cell.
+- For output level: using `IPython.display.display(obj,metadata={"ipub":{}})`, you can set metadata specific to a certain output. Options set at the output level will override options set at the cell level. for an example of this, run the [MultiOutput_Example.ipynb](example/notebooks/MultiOutput_Example.ipynb).
 
 **Please note**, setting a value to `"value":{}` is the same as `"value":false` so,
 if you are not setting additional options, use `"value":true`.

diff --git a/example/notebooks/MultiOutput_Example.ipynb b/example/notebooks/MultiOutput_Example.ipynb
@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "init_cell": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# load libraries\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "pd.set_option('display.latex.repr',True)\n",
+    "pd.set_option('display.latex.longtable',False)\n",
+    "pd.set_option('display.latex.escape',False)\n",
+    "\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame({'A': [np.random.randint(0, 100) for i in range(1000)],\n",
+    "                   'B': [np.random.randint(0, 100) for i in range(1000)],\n",
+    "                   'C': [np.random.randint(0, 100) for i in range(1000)]})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import display\n",
+    "from IPython.display import display_latex\n",
+    "from IPython.display import display_markdown"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def print_describe(var):\n",
+    "    \n",
+    "    kind = var.dtype.name\n",
+    "    data = []\n",
+    "    value = []\n",
+    "\n",
+    "    # Variable name\n",
+    "    data.append('Variable')\n",
+    "    value.append(var.name)\n",
+    "    # Data type\n",
+    "    data.append('Data type')\n",
+    "    value.append(var.dtype)\n",
+    "    # Data type name\n",
+    "    data.append('Data type name')\n",
+    "    value.append(var.dtype.name)\n",
+    "    # content\n",
+    "    data.append('Not NULL')\n",
+    "    value.append(var.count())\n",
+    "    # NULLs\n",
+    "    data.append('NULLs')\n",
+    "    value.append(len(var) - var.count())\n",
+    "\n",
+    "    df = pd.DataFrame({'Data': data, 'Value': value},\n",
+    "                      columns=['Data', 'Value'])\n",
+    "    display(df,metadata={'ipub':{\"table\":{\"caption\":False}}})\n",
+    "\n",
+    "    return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def print_top_bottom(var):\n",
+    "    \n",
+    "    vc = var.value_counts()\n",
+    "    if len(vc) == 0:\n",
+    "        return\n",
+    "\n",
+    "    kind = var.dtype.name\n",
+    "    n = min(len(vc), 5)\n",
+    "    desc_n = range(n)\n",
+    "    desc_k = list(vc.keys())[:n]\n",
+    "    desc_v = list(vc)[:n]\n",
+    "\n",
+    "    vc = var.value_counts(ascending=True)\n",
+    "    asc_n = range(len(vc), 0, -1)[:n]\n",
+    "    asc_k = list(vc.keys())[:n]\n",
+    "    asc_v = list(vc)[:n]\n",
+    "\n",
+    "    df = pd.DataFrame({'TOP: Idx': desc_n, 'T.Data': desc_k, 'T.Count': desc_v,\n",
+    "                       'BOTTOM: Idx': asc_n, 'B.Data': asc_k, 'B.Count': asc_v},\n",
+    "                      columns=['TOP: Idx', 'T.Data', 'T.Count', \n",
+    "                               'BOTTOM: Idx', 'B.Data', 'B.Count'])\n",
+    "    display(df,metadata={'ipub':{\"table\":{\"caption\":'Column %s' % var.name}}})\n",
+    "    return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def print_bar_chart(var):\n",
+    "    \n",
+    "    ax = var.plot(figsize=(15, 5))\n",
+    "    display(ax.get_figure(),metadata={'ipub':{\"figure\":{\"caption\":'Column %s' % var.name}}})\n",
+    "    plt.close()\n",
+    "    return"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "ipub": {
+     "ignore": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def EDA_univariate(var):\n",
+    "    \n",
+    "    display_markdown('## Describe', raw=True)\n",
+    "    print_describe(var)\n",
+    "\n",
+    "    display_markdown('## Most/Least common', raw=True)\n",
+    "    print_top_bottom(var)\n",
+    "\n",
+    "    display_markdown('## Chart', raw=True)\n",
+    "    print_bar_chart(var)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Full Report\n",
+    "For each column will have:\n",
+    "\n",
+    "- Title (markdown)  \n",
+    "- Details (pandas table)  \n",
+    "- Most/Least common (pandas table)  \n",
+    "- Chart (image/matplotlib)  \n",
+    "- **LaTeX** (`\\newpage`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ipub": {
+     "equation": {
+      "label": "abc"
+     },
+     "figure": {
+      "caption": "Figure caption.",
+      "label": "figure:flabel",
+      "placement": "H"
+     },
+     "mkdown": true,
+     "table": {
+      "alternate": "gray!20",
+      "caption": "An example of a table created with pandas dataframe.",
+      "label": "table:example",
+      "placement": "H"
+     },
+     "text": {
+      "asfloat": true,
+      "caption": "",
+      "format": {
+       "basicstyle": "\\small"
+      },
+      "label": "text:example_text",
+      "placement": "H",
+      "widefigure": false
+     }
+    },
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "for col_name in df.columns[0:3]:\n",
+    "    \n",
+    "    display_markdown('# Column: %s' % col_name, raw=True)\n",
+    "\n",
+    "    col = df[col_name]\n",
+    "    kind = col.dtype.name\n",
+    "\n",
+    "    EDA_univariate(col)\n",
+    "    \n",
+    "    display_markdown('\\\\newpage', raw=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Edit Metadata",
+  "hide_input": false,
+  "ipub": {
+   "listcode": false,
+   "listfigures": true,
+   "listtables": true,
+   "titlepage": {
+    "author": "The Author",
+    "email": "professional@author.com",
+    "institution": [
+     "Katra"
+    ],
+    "subtitle": "360",
+    "supervisors": [
+     "The Boss"
+    ],
+    "tagline": "A tagline for the report.",
+    "title": "Data Source"
+   },
+   "toc": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  },
+  "toc": {
+   "colors": {
+    "hover_highlight": "#DAA520",
+    "navigate_num": "#000000",
+    "navigate_text": "#333333",
+    "running_highlight": "#FF0000",
+    "selected_highlight": "#FFD700",
+    "sidebar_border": "#EEEEEE",
+    "wrapper_background": "#FFFFFF"
+   },
+   "moveMenuLeft": true,
+   "nav_menu": {
+    "height": "31px",
+    "width": "252px"
+   },
+   "navigate_menu": true,
+   "number_sections": true,
+   "sideBar": true,
+   "threshold": 4,
+   "toc_cell": false,
+   "toc_section_display": "block",
+   "toc_window_display": false,
+   "widenNotebook": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ipypublish/__init__.py b/ipypublish/__init__.py
@@ -1 +1 @@
-__version__ = '0.6.0'
+__version__ = '0.6.1'
diff --git a/ipypublish/export_plugins/html_ipypublish_all.py b/ipypublish/export_plugins/html_ipypublish_all.py
@@ -21,6 +21,7 @@
 from ipypublish.preprocessors.latextags_to_html import LatexTagsToHTML
 from ipypublish.filters.replace_string import replace_string
 from ipypublish.preprocessors.latex_doc_defaults import MetaDefaults
+from ipypublish.preprocessors.split_outputs import SplitOutputs
 
 cell_defaults = {
   "ipub": {
@@ -51,7 +52,8 @@
 oformat = 'HTML'   
 config = {'TemplateExporter.filters':{'replace_string':replace_string},
           'Exporter.filters':{'replace_string':replace_string},
-          'Exporter.preprocessors':[MetaDefaults,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
+          'Exporter.preprocessors':[MetaDefaults,SplitOutputs,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
+          'SplitOutputs.split':True,
           'MetaDefaults.cell_defaults':cell_defaults,
           'MetaDefaults.nb_defaults':nb_defaults,
           'LatexCaptions.add_prefix':True}

diff --git a/ipypublish/export_plugins/html_ipypublish_main.py b/ipypublish/export_plugins/html_ipypublish_main.py
@@ -20,11 +20,13 @@
 from ipypublish.preprocessors.latex_doc_html import LatexDocHTML
 from ipypublish.preprocessors.latextags_to_html import LatexTagsToHTML
 from ipypublish.filters.replace_string import replace_string
+from ipypublish.preprocessors.split_outputs import SplitOutputs
 
 oformat = 'HTML'   
 config = {'TemplateExporter.filters':{'replace_string':replace_string},
           'Exporter.filters':{'replace_string':replace_string},
-          'Exporter.preprocessors':[LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
+          'Exporter.preprocessors':[SplitOutputs,LatexDocLinks,LatexDocHTML,LatexTagsToHTML,LatexCaptions],
+          'SplitOutputs.split':True,
           'LatexCaptions.add_prefix':True}
 
 template = create_tpl([