feat: improved reports: more interactive and modern interface, abilit…

…y to define a label based representation of files (#1470)
snakemake · Mar 13, 2022 · d09df0c · d09df0c
1 parent 6b1d09c
commit d09df0c
Show file tree

Hide file tree

Showing 47 changed files with 108,454 additions and 2,674 deletions.
diff --git a/docs/snakefiles/reporting.rst b/docs/snakefiles/reporting.rst
@@ -52,7 +52,11 @@ Consider the following example:
 
   rule d:
       output:
-          report(directory("testdir"), patterns=["{name}.txt"], caption="report/somedata.rst", category="Step 3")
+          report(
+              directory("testdir"), 
+              patterns=["{name}.txt"], 
+              caption="report/somedata.rst", 
+              category="Step 3")
       shell:
           "mkdir {output}; for i in 1 2 3; do echo $i > {output}/$i.txt; done"
 
@@ -89,8 +93,44 @@ This works as follows:
             echo \"alert('test')\" > test/js/test.js
             """
 
+Defining file labels
+~~~~~~~~~~~~~~~~~~~~~
 
-Moreover, in every ``.rst`` document, you can link to
+In addition to category, and subcategory, it is possible to define a dictionary of labels for each report item.
+By that, the actual filename will be hidden in the report and instead a table with the label keys as columns and the values in the respective row for the file will be displayed.
+This can lead to less technical reports that abstract away the fact that the results of the analysis are actually files.
+Consider the following modification of rule ``b`` from above:
+
+.. code-block:: python
+
+    rule b:
+      input:
+          expand("{model}.{i}.out", i=range(10))
+      output:
+          report(
+              "fig2.png", 
+              caption="report/fig2.rst", 
+              category="Step 2", 
+              subcategory="{model}",
+              labels={
+                  "model": "{model}",
+                  "figure": "some plot"
+              }
+          )
+      shell:
+          "sleep `shuf -i 1-3 -n 1`; cp data/fig2.png {output}"
+
+
+Determining category, subcategory, and labels dynamically via functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Similar to e.g. with input file and parameter definition (see :ref:`snakefiles-input_functions`), ``category`` and a ``subcategory`` and ``labels`` can be specified by pointing to a function which is expected to return a string.
+
+
+Linking between items
+~~~~~~~~~~~~~~~~~~~~~
+
+In every ``.rst`` document, you can link to
 
 * the **Workflow** panel (with ``Rules_``),
 * the **Statistics** panel (with ``Statistics_``),
@@ -99,6 +139,9 @@ Moreover, in every ``.rst`` document, you can link to
 
 For details about the hyperlink mechanism of restructured text see `here <https://docutils.sourceforge.io/docs/user/rst/quickref.html#hyperlink-targets>`_.
 
+Rendering reports
+~~~~~~~~~~~~~~~~~
+
 To create the report simply run
 
 .. code-block:: bash

diff --git a/setup.py b/setup.py
@@ -41,6 +41,8 @@
         "snakemake",
         "snakemake.remote",
         "snakemake.report",
+        "snakemake.report.template",
+        "snakemake.report.data",
         "snakemake.common",
         "snakemake.caching",
         "snakemake.deployment",
@@ -56,7 +58,7 @@
             "snakemake-bash-completion = snakemake:bash_completion",
         ]
     },
-    package_data={"": ["*.css", "*.sh", "*.html", "*.jinja2"]},
+    package_data={"": ["*.css", "*.sh", "*.html", "*.jinja2", "*.js", "*.svg"]},
     install_requires=[
         "wrapt",
         "requests",
@@ -81,7 +83,7 @@
         "retry",
     ],
     extras_require={
-        "reports": ["jinja2", "networkx", "pygments", "pygraphviz"],
+        "reports": ["jinja2", "pygments"],
         "messaging": ["slacker"],
         "google-cloud": [
             "oauth2client",

diff --git a/snakemake/common/__init__.py b/snakemake/common/__init__.py
@@ -257,3 +257,13 @@ class Gather:
     """A namespace for gather to allow items to be accessed via dot notation."""
 
     pass
+
+
+def get_function_params(func):
+    return inspect.signature(func).parameters
+
+
+def get_input_function_aux_params(func, candidate_params):
+    func_params = get_function_params(func)
+
+    return {k: v for k, v in candidate_params.items() if k in func_params}
diff --git a/snakemake/dag.py b/snakemake/dag.py
@@ -2235,6 +2235,28 @@ def stats(self):
         yield tabulate(rows, headers="keys")
         yield ""
 
+    def toposorted(self, jobs=None, inherit_pipe_dependencies=False):
+        from toposort import toposort
+
+        if jobs is None:
+            jobs = set(self.jobs)
+
+        def get_dependencies(job):
+            for dep, files in self.dependencies[job].items():
+                if dep in jobs:
+                    yield dep
+                    if inherit_pipe_dependencies and any(
+                        is_flagged(f, "pipe") for f in files
+                    ):
+                        # In case of a pipe, inherit the dependencies of the producer,
+                        # such that the two jobs end up on the same toposort level.
+                        # This is important because they are executed simulataneously.
+                        yield from get_dependencies(dep)
+
+        dag = {job: set(get_dependencies(job)) for job in jobs}
+
+        return toposort(dag)
+
     def get_outputs_with_changes(self, change_type, include_needrun=True):
         is_changed = lambda job: (
             getattr(self.workflow.persistence, f"{change_type}_changed")(job)

diff --git a/snakemake/io.py b/snakemake/io.py
@@ -1080,12 +1080,19 @@ def checkpoint_target(value):
 
 
 ReportObject = collections.namedtuple(
-    "ReportObject", ["caption", "category", "subcategory", "patterns", "htmlindex"]
+    "ReportObject",
+    ["caption", "category", "subcategory", "labels", "patterns", "htmlindex"],
 )
 
 
 def report(
-    value, caption=None, category=None, subcategory=None, patterns=[], htmlindex=None
+    value,
+    caption=None,
+    category=None,
+    subcategory=None,
+    labels=None,
+    patterns=[],
+    htmlindex=None,
 ):
     """Flag output file or directory as to be included into reports.
 
@@ -1094,15 +1101,17 @@ def report(
     Arguments
     value -- File or directory.
     caption -- Path to a .rst file with a textual description of the result.
-    category -- Name of the category in which the result should be displayed in the report.
-    pattern -- Wildcard pattern for selecting files if a directory is given (this is used as
+    category -- Name of the (optional) category in which the result should be displayed in the report.
+    subcategory -- Name of the (optional) subcategory
+    columns  -- Dict of strings (may contain wildcard expressions) that will be used as columns when displaying result tables
+    patterns -- Wildcard patterns for selecting files if a directory is given (this is used as
                input for snakemake.io.glob_wildcards). Pattern shall not include the path to the
                directory itself.
     """
     return flag(
         value,
         "report",
-        ReportObject(caption, category, subcategory, patterns, htmlindex),
+        ReportObject(caption, category, subcategory, labels, patterns, htmlindex),
     )
 
 

diff --git a/snakemake/jobs.py b/snakemake/jobs.py
@@ -1182,20 +1182,9 @@ def finalize(self):
         from toposort import toposort
 
         if self.toposorted is None:
-
-            def get_dependencies(job):
-                for dep, files in self.dag.dependencies[job].items():
-                    if dep in self.jobs:
-                        yield dep
-                        if any(is_flagged(f, "pipe") for f in files):
-                            # In case of a pipe, inherit the dependencies of the producer,
-                            # such that the two jobs end up on the same toposort level.
-                            # This is important because they are executed simulataneously.
-                            yield from get_dependencies(dep)
-
-            dag = {job: set(get_dependencies(job)) for job in self.jobs}
-
-            self.toposorted = list(toposort(dag))
+            self.toposorted = list(
+                self.dag.toposorted(jobs=self.jobs, inherit_pipe_dependencies=True)
+            )
 
     @property
     def all_products(self):