Skip to content

Commit

Permalink
feat: improved reports: more interactive and modern interface, abilit…
Browse files Browse the repository at this point in the history
…y to define a label based representation of files (#1470)
  • Loading branch information
johanneskoester committed Mar 13, 2022
1 parent 6b1d09c commit d09df0c
Show file tree
Hide file tree
Showing 47 changed files with 108,454 additions and 2,674 deletions.
47 changes: 45 additions & 2 deletions docs/snakefiles/reporting.rst
Expand Up @@ -52,7 +52,11 @@ Consider the following example:
rule d:
output:
report(directory("testdir"), patterns=["{name}.txt"], caption="report/somedata.rst", category="Step 3")
report(
directory("testdir"),
patterns=["{name}.txt"],
caption="report/somedata.rst",
category="Step 3")
shell:
"mkdir {output}; for i in 1 2 3; do echo $i > {output}/$i.txt; done"
Expand Down Expand Up @@ -89,8 +93,44 @@ This works as follows:
echo \"alert('test')\" > test/js/test.js
"""
Defining file labels
~~~~~~~~~~~~~~~~~~~~~

Moreover, in every ``.rst`` document, you can link to
In addition to category, and subcategory, it is possible to define a dictionary of labels for each report item.
By that, the actual filename will be hidden in the report and instead a table with the label keys as columns and the values in the respective row for the file will be displayed.
This can lead to less technical reports that abstract away the fact that the results of the analysis are actually files.
Consider the following modification of rule ``b`` from above:

.. code-block:: python
rule b:
input:
expand("{model}.{i}.out", i=range(10))
output:
report(
"fig2.png",
caption="report/fig2.rst",
category="Step 2",
subcategory="{model}",
labels={
"model": "{model}",
"figure": "some plot"
}
)
shell:
"sleep `shuf -i 1-3 -n 1`; cp data/fig2.png {output}"
Determining category, subcategory, and labels dynamically via functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Similar to e.g. with input file and parameter definition (see :ref:`snakefiles-input_functions`), ``category`` and a ``subcategory`` and ``labels`` can be specified by pointing to a function which is expected to return a string.


Linking between items
~~~~~~~~~~~~~~~~~~~~~

In every ``.rst`` document, you can link to

* the **Workflow** panel (with ``Rules_``),
* the **Statistics** panel (with ``Statistics_``),
Expand All @@ -99,6 +139,9 @@ Moreover, in every ``.rst`` document, you can link to

For details about the hyperlink mechanism of restructured text see `here <https://docutils.sourceforge.io/docs/user/rst/quickref.html#hyperlink-targets>`_.

Rendering reports
~~~~~~~~~~~~~~~~~

To create the report simply run

.. code-block:: bash
Expand Down
6 changes: 4 additions & 2 deletions setup.py
Expand Up @@ -41,6 +41,8 @@
"snakemake",
"snakemake.remote",
"snakemake.report",
"snakemake.report.template",
"snakemake.report.data",
"snakemake.common",
"snakemake.caching",
"snakemake.deployment",
Expand All @@ -56,7 +58,7 @@
"snakemake-bash-completion = snakemake:bash_completion",
]
},
package_data={"": ["*.css", "*.sh", "*.html", "*.jinja2"]},
package_data={"": ["*.css", "*.sh", "*.html", "*.jinja2", "*.js", "*.svg"]},
install_requires=[
"wrapt",
"requests",
Expand All @@ -81,7 +83,7 @@
"retry",
],
extras_require={
"reports": ["jinja2", "networkx", "pygments", "pygraphviz"],
"reports": ["jinja2", "pygments"],
"messaging": ["slacker"],
"google-cloud": [
"oauth2client",
Expand Down
10 changes: 10 additions & 0 deletions snakemake/common/__init__.py
Expand Up @@ -257,3 +257,13 @@ class Gather:
"""A namespace for gather to allow items to be accessed via dot notation."""

pass


def get_function_params(func):
return inspect.signature(func).parameters


def get_input_function_aux_params(func, candidate_params):
func_params = get_function_params(func)

return {k: v for k, v in candidate_params.items() if k in func_params}
22 changes: 22 additions & 0 deletions snakemake/dag.py
Expand Up @@ -2235,6 +2235,28 @@ def stats(self):
yield tabulate(rows, headers="keys")
yield ""

def toposorted(self, jobs=None, inherit_pipe_dependencies=False):
from toposort import toposort

if jobs is None:
jobs = set(self.jobs)

def get_dependencies(job):
for dep, files in self.dependencies[job].items():
if dep in jobs:
yield dep
if inherit_pipe_dependencies and any(
is_flagged(f, "pipe") for f in files
):
# In case of a pipe, inherit the dependencies of the producer,
# such that the two jobs end up on the same toposort level.
# This is important because they are executed simulataneously.
yield from get_dependencies(dep)

dag = {job: set(get_dependencies(job)) for job in jobs}

return toposort(dag)

def get_outputs_with_changes(self, change_type, include_needrun=True):
is_changed = lambda job: (
getattr(self.workflow.persistence, f"{change_type}_changed")(job)
Expand Down
19 changes: 14 additions & 5 deletions snakemake/io.py
Expand Up @@ -1080,12 +1080,19 @@ def checkpoint_target(value):


ReportObject = collections.namedtuple(
"ReportObject", ["caption", "category", "subcategory", "patterns", "htmlindex"]
"ReportObject",
["caption", "category", "subcategory", "labels", "patterns", "htmlindex"],
)


def report(
value, caption=None, category=None, subcategory=None, patterns=[], htmlindex=None
value,
caption=None,
category=None,
subcategory=None,
labels=None,
patterns=[],
htmlindex=None,
):
"""Flag output file or directory as to be included into reports.
Expand All @@ -1094,15 +1101,17 @@ def report(
Arguments
value -- File or directory.
caption -- Path to a .rst file with a textual description of the result.
category -- Name of the category in which the result should be displayed in the report.
pattern -- Wildcard pattern for selecting files if a directory is given (this is used as
category -- Name of the (optional) category in which the result should be displayed in the report.
subcategory -- Name of the (optional) subcategory
columns -- Dict of strings (may contain wildcard expressions) that will be used as columns when displaying result tables
patterns -- Wildcard patterns for selecting files if a directory is given (this is used as
input for snakemake.io.glob_wildcards). Pattern shall not include the path to the
directory itself.
"""
return flag(
value,
"report",
ReportObject(caption, category, subcategory, patterns, htmlindex),
ReportObject(caption, category, subcategory, labels, patterns, htmlindex),
)


Expand Down
17 changes: 3 additions & 14 deletions snakemake/jobs.py
Expand Up @@ -1182,20 +1182,9 @@ def finalize(self):
from toposort import toposort

if self.toposorted is None:

def get_dependencies(job):
for dep, files in self.dag.dependencies[job].items():
if dep in self.jobs:
yield dep
if any(is_flagged(f, "pipe") for f in files):
# In case of a pipe, inherit the dependencies of the producer,
# such that the two jobs end up on the same toposort level.
# This is important because they are executed simulataneously.
yield from get_dependencies(dep)

dag = {job: set(get_dependencies(job)) for job in self.jobs}

self.toposorted = list(toposort(dag))
self.toposorted = list(
self.dag.toposorted(jobs=self.jobs, inherit_pipe_dependencies=True)
)

@property
def all_products(self):
Expand Down

0 comments on commit d09df0c

Please sign in to comment.