From f568d544eae2ada036b950f21d8b405add940fa1 Mon Sep 17 00:00:00 2001 From: Christophe Clienti Date: Mon, 7 Mar 2022 14:17:16 +0100 Subject: [PATCH] feat: trigger rerun when new files are yield by an input function Added the "missing" input flags to trigger rerun when new files are yield by an input function. Such flag is forwarded to all elements yield by the input function. --- docs/project_info/faq.rst | 6 ++-- snakemake/dag.py | 7 +++- snakemake/io.py | 7 ++++ snakemake/rules.py | 6 ++++ snakemake/workflow.py | 1 + tests/common.py | 8 +++-- tests/test_update_input/Snakefile | 34 +++++++++++++++++++ .../test_update_input/expected-results/A1.txt | 1 + .../test_update_input/expected-results/A2.txt | 1 + .../expected-results/B-doe.txt | 0 .../expected-results/B-fred.txt | 0 .../expected-results/B-john.txt | 0 tests/tests.py | 24 +++++++++++++ 13 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 tests/test_update_input/Snakefile create mode 100644 tests/test_update_input/expected-results/A1.txt create mode 100644 tests/test_update_input/expected-results/A2.txt create mode 100644 tests/test_update_input/expected-results/B-doe.txt create mode 100644 tests/test_update_input/expected-results/B-fred.txt create mode 100644 tests/test_update_input/expected-results/B-john.txt diff --git a/docs/project_info/faq.rst b/docs/project_info/faq.rst index 14084a40b1..98a31f8a8b 100644 --- a/docs/project_info/faq.rst +++ b/docs/project_info/faq.rst @@ -48,7 +48,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line. -Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. +Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. This way, you can easily check rule by rule if it does what you expect. However, note that ``--allowed-rules`` is only meant for debugging. A workflow should always work fine without it. @@ -285,7 +285,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr How should Snakefiles be formatted? -------------------------------------- -To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. +To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. Python code gets formatted with `black `_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 `_). @@ -484,6 +484,8 @@ Snakemake has a kind of "lazy" policy about added input files if their modificat Here, ``snakemake --list-input-changes`` returns the list of output files with changed input files, which is fed into ``-R`` to trigger a re-run. +It is worth mentioning that if the additional input files does not yet exist and can be found in outputs of another rules, Snakemake will correctly generate the missing dependencies and re-run the rule. + How do I trigger re-runs for rules with updated code or parameters? ------------------------------------------------------------------- diff --git a/snakemake/dag.py b/snakemake/dag.py index d768728bbf..1ef3c00df9 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -996,7 +996,12 @@ def update_needrun(job): output_mintime_ = output_mintime.get(job) if output_mintime_: updated_input = [ - f for f in job.input if f.exists and f.is_newer(output_mintime_) + f + for f in job.input + if ( + (f.exists and f.is_newer(output_mintime_)) + or (not f.exists and is_flagged(f, "missing")) + ) ] reason.updated_input.update(updated_input) if noinitreason and reason: diff --git a/snakemake/io.py b/snakemake/io.py index f6772cdb35..71da0c2a58 100755 --- a/snakemake/io.py +++ b/snakemake/io.py @@ -987,6 +987,13 @@ def ancient(value): return flag(value, "ancient") +def missing(value): + """ + Re run if new input files are missing; ie missing files will be generated first and then the considered rule is regenerated. + """ + return flag(value, "missing") + + def directory(value): """ A flag to specify that output is a directory, rather than a file or named pipe. diff --git a/snakemake/rules.py b/snakemake/rules.py index 45730db1c4..6649510d1e 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -20,6 +20,7 @@ _IOFile, protected, temp, + missing, dynamic, Namedlist, AnnotatedString, @@ -774,6 +775,7 @@ def _apply_wildcards( for name, item in olditems._allitems(): start = len(newitems) is_unpack = is_flagged(item, "unpack") + is_missing = is_flagged(item, "missing") _is_callable = is_callable(item) if _is_callable: @@ -831,6 +833,10 @@ def _apply_wildcards( if from_callable and apply_path_modifier and not incomplete: item_ = self.apply_path_modifier(item_, property=property) + # Forward the missing flag is necessary + if is_missing: + item_ = missing(item_) + concrete = concretize(item_, wildcards, _is_callable) newitems.append(concrete) if mapping is not None: diff --git a/snakemake/workflow.py b/snakemake/workflow.py index 12de6fd2d0..933b68eada 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -41,6 +41,7 @@ temp, temporary, ancient, + missing, directory, expand, dynamic, diff --git a/tests/common.py b/tests/common.py index ef1532eaf2..965ed1bbe2 100644 --- a/tests/common.py +++ b/tests/common.py @@ -98,6 +98,7 @@ def run( snakefile="Snakefile", subpath=None, no_tmpdir=False, + tmpdir=None, check_md5=True, check_results=True, cores=3, @@ -134,9 +135,10 @@ def run( ), "{} does not exist".format(results_dir) # If we need to further check results, we won't cleanup tmpdir - tmpdir = next(tempfile._get_candidate_names()) - tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir) - os.mkdir(tmpdir) + if not tmpdir: + tmpdir = next(tempfile._get_candidate_names()) + tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir) + os.mkdir(tmpdir) config = dict(config) diff --git a/tests/test_update_input/Snakefile b/tests/test_update_input/Snakefile new file mode 100644 index 0000000000..7c3fe530e8 --- /dev/null +++ b/tests/test_update_input/Snakefile @@ -0,0 +1,34 @@ +rule all: + input: "A1.txt", "A2.txt" + +rule A: + input: "A{index}.tmp" + output: "A{index}.txt" + shell: "cp {input} {output}" + + +rule A_TMP_1: + input: + "B-fred.txt" + + output: + temp("A1.tmp") + + run: + f = open(output[0], "w") + f.write(' '.join(input) + "\n") + +rule A_TMP_2: + input: + missing(lambda wildcards: [rules.B.output[0].format(name=name) + for name in config.get("names", "john").split(",")]) + output: + temp("A2.tmp") + + run: + f = open(output[0], "w") + f.write(' '.join(input) + "\n") + +rule B: + output: + touch("B-{name}.txt") diff --git a/tests/test_update_input/expected-results/A1.txt b/tests/test_update_input/expected-results/A1.txt new file mode 100644 index 0000000000..9342664223 --- /dev/null +++ b/tests/test_update_input/expected-results/A1.txt @@ -0,0 +1 @@ +B-fred.txt diff --git a/tests/test_update_input/expected-results/A2.txt b/tests/test_update_input/expected-results/A2.txt new file mode 100644 index 0000000000..55da7d815e --- /dev/null +++ b/tests/test_update_input/expected-results/A2.txt @@ -0,0 +1 @@ +B-john.txt B-doe.txt diff --git a/tests/test_update_input/expected-results/B-doe.txt b/tests/test_update_input/expected-results/B-doe.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_update_input/expected-results/B-fred.txt b/tests/test_update_input/expected-results/B-fred.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_update_input/expected-results/B-john.txt b/tests/test_update_input/expected-results/B-john.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/tests.py b/tests/tests.py index 329162eb28..5387faa99f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1534,3 +1534,27 @@ def test_groupid_expand_cluster(): @skip_on_windows def test_service_jobs(): run(dpath("test_service_jobs"), check_md5=False) + + +def test_update_input(): + try: + # First run + tmpdir = run(dpath("test_update_input"), cleanup=False, check_results=False) + a1_txt = os.path.join(tmpdir, "A1.txt") + mtime_a1_txt = os.path.getmtime(a1_txt) + + # Prepare the update run with new values in the input function of rule A_TMP_2 + shutil.rmtree(os.path.join(tmpdir, "expected-results")) + shutil.rmtree(os.path.join(tmpdir, ".snakemake")) + run( + dpath("test_update_input"), + config={"names": "john,doe"}, + cores=1, + tmpdir=tmpdir, + cleanup=False, + ) + + # Check that A1.txt is left untouched. + assert os.path.getmtime(a1_txt) == mtime_a1_txt + finally: + shutil.rmtree(tmpdir)