From 6771da4e6f7230df98429673cdd3c538b587df60 Mon Sep 17 00:00:00 2001 From: Christophe Clienti Date: Mon, 7 Mar 2022 14:17:16 +0100 Subject: [PATCH] fix: added missing input files in reason.updated_input in dag.py Let's consider a 'A' rule that takes 'N' inputs from a 'B' rule with different wildcards. The input function for the A rule requests output files from 'B'. If a first run has already generated the output of 'A' and 'B' and if the input function of 'A' requests new input files from B not yet generated, snakemake will neither generate missing 'B' output files nor regenerate the 'A' output. However the list-input-changes is able to list correctly the missing files. The commit allows to generate missing 'B' outputs and regenerate the 'A' output. --- docs/project_info/faq.rst | 6 ++++-- snakemake/dag.py | 4 +++- tests/test_update_input/Snakefile | 14 ++++++++++++++ tests/test_update_input/expected-results/A.txt | 1 + tests/test_update_input/expected-results/B-doe.txt | 0 .../test_update_input/expected-results/B-john.txt | 0 tests/tests.py | 5 +++++ 7 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/test_update_input/Snakefile create mode 100644 tests/test_update_input/expected-results/A.txt create mode 100644 tests/test_update_input/expected-results/B-doe.txt create mode 100644 tests/test_update_input/expected-results/B-john.txt diff --git a/docs/project_info/faq.rst b/docs/project_info/faq.rst index 14084a40b1..98a31f8a8b 100644 --- a/docs/project_info/faq.rst +++ b/docs/project_info/faq.rst @@ -48,7 +48,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line. -Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. +Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. This way, you can easily check rule by rule if it does what you expect. However, note that ``--allowed-rules`` is only meant for debugging. A workflow should always work fine without it. @@ -285,7 +285,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr How should Snakefiles be formatted? -------------------------------------- -To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. +To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. Python code gets formatted with `black `_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 `_). @@ -484,6 +484,8 @@ Snakemake has a kind of "lazy" policy about added input files if their modificat Here, ``snakemake --list-input-changes`` returns the list of output files with changed input files, which is fed into ``-R`` to trigger a re-run. +It is worth mentioning that if the additional input files does not yet exist and can be found in outputs of another rules, Snakemake will correctly generate the missing dependencies and re-run the rule. + How do I trigger re-runs for rules with updated code or parameters? ------------------------------------------------------------------- diff --git a/snakemake/dag.py b/snakemake/dag.py index d768728bbf..b0fbae1bbc 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -996,7 +996,9 @@ def update_needrun(job): output_mintime_ = output_mintime.get(job) if output_mintime_: updated_input = [ - f for f in job.input if f.exists and f.is_newer(output_mintime_) + f + for f in job.input + if (f.exists and f.is_newer(output_mintime_)) or (not f.exists) ] reason.updated_input.update(updated_input) if noinitreason and reason: diff --git a/tests/test_update_input/Snakefile b/tests/test_update_input/Snakefile new file mode 100644 index 0000000000..6204ab1037 --- /dev/null +++ b/tests/test_update_input/Snakefile @@ -0,0 +1,14 @@ +rule all: + input: + lambda wildcards: [rules.B.output[0].format(name=name) + for name in config.get("names", "john").split(",")] + output: + "A.txt" + + run: + f = open(output[0], "w") + f.write(' '.join(input) + "\n") + +rule B: + output: + touch("B-{name}.txt") diff --git a/tests/test_update_input/expected-results/A.txt b/tests/test_update_input/expected-results/A.txt new file mode 100644 index 0000000000..55da7d815e --- /dev/null +++ b/tests/test_update_input/expected-results/A.txt @@ -0,0 +1 @@ +B-john.txt B-doe.txt diff --git a/tests/test_update_input/expected-results/B-doe.txt b/tests/test_update_input/expected-results/B-doe.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_update_input/expected-results/B-john.txt b/tests/test_update_input/expected-results/B-john.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/tests.py b/tests/tests.py index 63abd0edc4..0a9b47d388 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1523,3 +1523,8 @@ def test_groupid_expand_cluster(): @skip_on_windows def test_service_jobs(): run(dpath("test_service_jobs"), check_md5=False) + + +def test_update_input(): + run(dpath("test_update_input"), cleanup=False, check_results=False) + run(dpath("test_update_input"), config={"names": "john,doe"})