From 638ec1a983741cd7ba8faaf1a9dc76ae43d012e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Sat, 29 Jan 2022 15:45:07 +0100 Subject: [PATCH] feat: adding default_target directive for declaring default target rules that are not the first rule in the workflow. (#1358) * feat: adding default_target directive for declaring default target rules that are not the first rule in the workflow. * fmt * fix check * Update tests.py --- docs/snakefiles/deployment.rst | 34 ++++++++++++++---- docs/snakefiles/rules.rst | 18 ++++++++-- snakemake/__init__.py | 4 ++- snakemake/dag.py | 2 +- snakemake/modules.py | 2 +- snakemake/parser.py | 7 ++++ snakemake/ruleinfo.py | 1 + snakemake/workflow.py | 36 ++++++++++++++----- tests/test_default_target/Snakefile | 11 ++++++ .../expected-results/1.txt | 1 + .../expected-results/2.txt | 1 + tests/tests.py | 5 +++ 12 files changed, 102 insertions(+), 20 deletions(-) create mode 100644 tests/test_default_target/Snakefile create mode 100644 tests/test_default_target/expected-results/1.txt create mode 100644 tests/test_default_target/expected-results/2.txt diff --git a/docs/snakefiles/deployment.rst b/docs/snakefiles/deployment.rst index f6856ce45..e3dac0337 100644 --- a/docs/snakefiles/deployment.rst +++ b/docs/snakefiles/deployment.rst @@ -103,7 +103,7 @@ For example, we can easily add another rule to extend the given workflow: github("snakemake-workflows/dna-seq-gatk-variant-calling", path="workflow/Snakefile", tag="v2.0.1") config: config - use rule * from dna_seq + use rule * from dna_seq as dna_seq_* # easily extend the workflow rule plot_vafs: @@ -114,7 +114,19 @@ For example, we can easily add another rule to extend the given workflow: notebook: "notebooks/plot-vafs.py.ipynb" -Moreover, it is possible to further extend the workflow with other modules, thereby generating an integrative analysis. + # Define a new default target that collects both the targets from the dna_seq module as well as + # the new plot. + rule all: + input: + rules.dna_seq_all.input, + "results/plots/vafs.svg", + default_target: True + +Above, we have added a prefix to all rule names of the dna_seq module, such that there is no name clash with the added rules (``as dna_seq_*`` in the ``use rule`` statement). +In addition, we have added a new rule ``all``, defining the default target in case the workflow is executed (as usually) without any specific target files or rule. +The new target rule collects both all input files of the rule ``all`` from the dna_seq workflow, as well as additionally collecting the new plot. + +It is possible to further extend the workflow with other modules, thereby generating an integrative analysis. Here, let us assume that we want to conduct another kind of analysis, say RNA-seq, using a different external workflow. We can extend above example in the following way: @@ -149,10 +161,20 @@ We can extend above example in the following way: use rule * from rna_seq as rna_seq_* -Above, several things have changed. First, we have added another module ``rna_seq``. -Second, we have added a prefix to all rule names of both modules (``dna_seq_*`` and ``rna_seq_*`` in the ``use rule`` statements) in order to avoid rule name clashes. -Third, we have added a prefix to all non-absolute input and output file names of both modules (``prefix: "dna-seq"`` and ``prefix: "rna-seq"``) in order to avoid file name clashes. -Finally, we provide the config of the two modules via two separate sections in the common config file (``config["dna-seq"]`` and ``config["rna-seq"]``). + + # Define a new default target that collects all the targets from the dna_seq and rna_seq module. + rule all: + input: + rules.dna_seq_all.input, + rules.rna_seq_all.input, + default_target: True + +Above, several things have changed. + +* First, we have added another module ``rna_seq``. +* Second, we have added a prefix to all non-absolute input and output file names of both modules (``prefix: "dna-seq"`` and ``prefix: "rna-seq"``) in order to avoid file name clashes. +* Third, we have added a default target rule that collects both the default targets from the module ``dna_seq`` as well as the module ``rna_seq``. +* Finally, we provide the config of the two modules via two separate sections in the common config file (``config["dna-seq"]`` and ``config["rna-seq"]``). ---------------------------------- Uploading workflows to WorkflowHub diff --git a/docs/snakefiles/rules.rst b/docs/snakefiles/rules.rst index 13b89dd77..9a4f4a469 100644 --- a/docs/snakefiles/rules.rst +++ b/docs/snakefiles/rules.rst @@ -246,11 +246,25 @@ By default snakemake executes the first rule in the snakefile. This gives rise t .. code-block:: python rule all: - input: ["{dataset}/file.A.txt".format(dataset=dataset) for dataset in DATASETS] + input: + expand("{dataset}/file.A.txt", dataset=DATASETS) + + +Here, for each dataset in a python list ``DATASETS`` defined before, the file ``{dataset}/file.A.txt`` is requested. +In this example, Snakemake recognizes automatically that these can be created by multiple applications of the rule ``complex_conversion`` shown above. +It is possible to overwrite this behavior to use the first rule as a default target, by explicitly marking a rule as being the default target via the ``default_target`` directive: -Here, for each dataset in a python list ``DATASETS`` defined before, the file ``{dataset}/file.A.txt`` is requested. In this example, Snakemake recognizes automatically that these can be created by multiple applications of the rule ``complex_conversion`` shown above. +.. code-block:: python + + rule xy: + input: + expand("{dataset}/file.A.txt", dataset=DATASETS) + default_target: True +Regardless of where this rule appears in the Snakefile, it will be the default target. +Usually, it is still recommended to keep the default target rule (and in fact all other rules that could act as optional targets) at the top of the file, such that it can be easily found. +The ``default_target`` directive becomes particularly useful when :ref:`combining several pre-existing workflows `. .. _snakefiles-threads: diff --git a/snakemake/__init__.py b/snakemake/__init__.py index 373c51805..57be04d75 100644 --- a/snakemake/__init__.py +++ b/snakemake/__init__.py @@ -591,7 +591,9 @@ def snakemake( success = True workflow.include( - snakefile, overwrite_first_rule=True, print_compilation=print_compilation + snakefile, + overwrite_default_target=True, + print_compilation=print_compilation, ) workflow.check() diff --git a/snakemake/dag.py b/snakemake/dag.py index a50efc41c..20d50bb7b 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -622,7 +622,7 @@ def unneeded_files(): and not job.is_checkpoint and ( job not in self.targetjobs - or job.rule.name == self.workflow.first_rule + or job.rule.name == self.workflow.default_target ) ): tempfiles = ( diff --git a/snakemake/modules.py b/snakemake/modules.py index 37164897a..be34db2d4 100644 --- a/snakemake/modules.py +++ b/snakemake/modules.py @@ -86,7 +86,7 @@ def use_rules(self, rules=None, name_modifier=None, ruleinfo=None): prefix=self.prefix, replace_wrapper_tag=self.get_wrapper_tag(), ): - self.workflow.include(snakefile, overwrite_first_rule=True) + self.workflow.include(snakefile, overwrite_default_target=True) def get_snakefile(self): if self.meta_wrapper: diff --git a/snakemake/parser.py b/snakemake/parser.py index 9e0163926..73d5e60b5 100644 --- a/snakemake/parser.py +++ b/snakemake/parser.py @@ -484,6 +484,12 @@ def keyword(self): return "cache_rule" +class DefaultTarget(RuleKeywordState): + @property + def keyword(self): + return "default_target_rule" + + class Handover(RuleKeywordState): pass @@ -673,6 +679,7 @@ def args(self): group=Group, cache=Cache, handover=Handover, + default_target=DefaultTarget, ) diff --git a/snakemake/ruleinfo.py b/snakemake/ruleinfo.py index 64a21c7b5..f9305b1bb 100644 --- a/snakemake/ruleinfo.py +++ b/snakemake/ruleinfo.py @@ -37,6 +37,7 @@ def __init__(self, func=None): self.cache = False self.path_modifier = None self.handover = False + self.default_target = False def apply_modifier( self, modifier, prefix_replacables={"input", "output", "log", "benchmark"} diff --git a/snakemake/workflow.py b/snakemake/workflow.py index a13f8c0af..67ee74aeb 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -155,7 +155,7 @@ def __init__( self.global_resources["_nodes"] = nodes self._rules = OrderedDict() - self.first_rule = None + self.default_target = None self._workdir = None self.overwrite_workdir = overwrite_workdir self.workdir_init = os.path.abspath(os.curdir) @@ -466,8 +466,8 @@ def add_rule( self._rules[rule.name] = rule if not is_overwrite: self.rule_count += 1 - if not self.first_rule: - self.first_rule = rule.name + if not self.default_target: + self.default_target = rule.name return name def is_rule(self, name): @@ -644,7 +644,9 @@ def files(items): return map(relpath, filterfalse(self.is_rule, items)) if not targets: - targets = [self.first_rule] if self.first_rule is not None else list() + targets = ( + [self.default_target] if self.default_target is not None else list() + ) if prioritytargets is None: prioritytargets = list() @@ -1148,7 +1150,7 @@ def containerize(self): def include( self, snakefile, - overwrite_first_rule=False, + overwrite_default_target=False, print_compilation=False, overwrite_shellcmd=None, ): @@ -1164,7 +1166,7 @@ def include( self.included.append(snakefile) self.included_stack.append(snakefile) - first_rule = self.first_rule + default_target = self.default_target code, linemap, rulecount = parse( snakefile, self, @@ -1185,8 +1187,8 @@ def include( exec(compile(code, snakefile.get_path_or_uri(), "exec"), self.globals) - if not overwrite_first_rule: - self.first_rule = first_rule + if not overwrite_default_target: + self.default_target = default_target self.included_stack.pop() def onstart(self, func): @@ -1558,11 +1560,20 @@ def decorate(ruleinfo): self.cache_rules.add(rule.name) elif not (ruleinfo.cache is False): raise WorkflowError( - "Invalid argument for 'cache:' directive. Only true allowed. " + "Invalid argument for 'cache:' directive. Only True allowed. " "To deactivate caching, remove directive.", rule=rule, ) + if ruleinfo.default_target is True: + self.default_target = rule.name + elif not (ruleinfo.default_target is False): + raise WorkflowError( + "Invalid argument for 'default_target:' directive. Only True allowed. " + "Do not use the directive for rules that shall not be the default target. ", + rule=rule, + ) + ruleinfo.func.__name__ = "__{}".format(rule.name) self.globals[ruleinfo.func.__name__] = ruleinfo.func @@ -1623,6 +1634,13 @@ def decorate(ruleinfo): return decorate + def default_target_rule(self, value): + def decorate(ruleinfo): + ruleinfo.default_target = value + return ruleinfo + + return decorate + def message(self, message): def decorate(ruleinfo): ruleinfo.message = message diff --git a/tests/test_default_target/Snakefile b/tests/test_default_target/Snakefile new file mode 100644 index 000000000..6f76a62b8 --- /dev/null +++ b/tests/test_default_target/Snakefile @@ -0,0 +1,11 @@ +rule a: + output: + "{sample}.txt" + shell: + "echo test > {output}" + + +rule b: + input: + expand("{sample}.txt", sample=[1, 2]) + default_target: True \ No newline at end of file diff --git a/tests/test_default_target/expected-results/1.txt b/tests/test_default_target/expected-results/1.txt new file mode 100644 index 000000000..9daeafb98 --- /dev/null +++ b/tests/test_default_target/expected-results/1.txt @@ -0,0 +1 @@ +test diff --git a/tests/test_default_target/expected-results/2.txt b/tests/test_default_target/expected-results/2.txt new file mode 100644 index 000000000..9daeafb98 --- /dev/null +++ b/tests/test_default_target/expected-results/2.txt @@ -0,0 +1 @@ +test diff --git a/tests/tests.py b/tests/tests.py index b1e05d1bc..2e54c4c2f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1399,5 +1399,10 @@ def test_conda_named(): run(dpath("test_conda_named"), use_conda=True) +@skip_on_windows +def test_default_target(): + run(dpath("test_default_target")) + + def test_cache_multioutput(): run(dpath("test_cache_multioutput"), shouldfail=True)