diff --git a/docs/project_info/faq.rst b/docs/project_info/faq.rst index 14084a40b1..98a31f8a8b 100644 --- a/docs/project_info/faq.rst +++ b/docs/project_info/faq.rst @@ -48,7 +48,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line. -Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. +Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. This way, you can easily check rule by rule if it does what you expect. However, note that ``--allowed-rules`` is only meant for debugging. A workflow should always work fine without it. @@ -285,7 +285,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr How should Snakefiles be formatted? -------------------------------------- -To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. +To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. Python code gets formatted with `black `_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 `_). @@ -484,6 +484,8 @@ Snakemake has a kind of "lazy" policy about added input files if their modificat Here, ``snakemake --list-input-changes`` returns the list of output files with changed input files, which is fed into ``-R`` to trigger a re-run. +It is worth mentioning that if the additional input files does not yet exist and can be found in outputs of another rules, Snakemake will correctly generate the missing dependencies and re-run the rule. + How do I trigger re-runs for rules with updated code or parameters? ------------------------------------------------------------------- diff --git a/snakemake/dag.py b/snakemake/dag.py index d768728bbf..1ef3c00df9 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -996,7 +996,12 @@ def update_needrun(job): output_mintime_ = output_mintime.get(job) if output_mintime_: updated_input = [ - f for f in job.input if f.exists and f.is_newer(output_mintime_) + f + for f in job.input + if ( + (f.exists and f.is_newer(output_mintime_)) + or (not f.exists and is_flagged(f, "missing")) + ) ] reason.updated_input.update(updated_input) if noinitreason and reason: diff --git a/snakemake/io.py b/snakemake/io.py index f6772cdb35..71da0c2a58 100755 --- a/snakemake/io.py +++ b/snakemake/io.py @@ -987,6 +987,13 @@ def ancient(value): return flag(value, "ancient") +def missing(value): + """ + Re run if new input files are missing; ie missing files will be generated first and then the considered rule is regenerated. + """ + return flag(value, "missing") + + def directory(value): """ A flag to specify that output is a directory, rather than a file or named pipe. diff --git a/snakemake/rules.py b/snakemake/rules.py index 45730db1c4..6649510d1e 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -20,6 +20,7 @@ _IOFile, protected, temp, + missing, dynamic, Namedlist, AnnotatedString, @@ -774,6 +775,7 @@ def _apply_wildcards( for name, item in olditems._allitems(): start = len(newitems) is_unpack = is_flagged(item, "unpack") + is_missing = is_flagged(item, "missing") _is_callable = is_callable(item) if _is_callable: @@ -831,6 +833,10 @@ def _apply_wildcards( if from_callable and apply_path_modifier and not incomplete: item_ = self.apply_path_modifier(item_, property=property) + # Forward the missing flag is necessary + if is_missing: + item_ = missing(item_) + concrete = concretize(item_, wildcards, _is_callable) newitems.append(concrete) if mapping is not None: diff --git a/snakemake/workflow.py b/snakemake/workflow.py index 12de6fd2d0..933b68eada 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -41,6 +41,7 @@ temp, temporary, ancient, + missing, directory, expand, dynamic, diff --git a/tests/common.py b/tests/common.py index ef1532eaf2..965ed1bbe2 100644 --- a/tests/common.py +++ b/tests/common.py @@ -98,6 +98,7 @@ def run( snakefile="Snakefile", subpath=None, no_tmpdir=False, + tmpdir=None, check_md5=True, check_results=True, cores=3, @@ -134,9 +135,10 @@ def run( ), "{} does not exist".format(results_dir) # If we need to further check results, we won't cleanup tmpdir - tmpdir = next(tempfile._get_candidate_names()) - tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir) - os.mkdir(tmpdir) + if not tmpdir: + tmpdir = next(tempfile._get_candidate_names()) + tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir) + os.mkdir(tmpdir) config = dict(config) diff --git a/tests/test_update_input/Snakefile b/tests/test_update_input/Snakefile new file mode 100644 index 0000000000..7c3fe530e8 --- /dev/null +++ b/tests/test_update_input/Snakefile @@ -0,0 +1,34 @@ +rule all: + input: "A1.txt", "A2.txt" + +rule A: + input: "A{index}.tmp" + output: "A{index}.txt" + shell: "cp {input} {output}" + + +rule A_TMP_1: + input: + "B-fred.txt" + + output: + temp("A1.tmp") + + run: + f = open(output[0], "w") + f.write(' '.join(input) + "\n") + +rule A_TMP_2: + input: + missing(lambda wildcards: [rules.B.output[0].format(name=name) + for name in config.get("names", "john").split(",")]) + output: + temp("A2.tmp") + + run: + f = open(output[0], "w") + f.write(' '.join(input) + "\n") + +rule B: + output: + touch("B-{name}.txt") diff --git a/tests/test_update_input/expected-results/A1.txt b/tests/test_update_input/expected-results/A1.txt new file mode 100644 index 0000000000..9342664223 --- /dev/null +++ b/tests/test_update_input/expected-results/A1.txt @@ -0,0 +1 @@ +B-fred.txt diff --git a/tests/test_update_input/expected-results/A2.txt b/tests/test_update_input/expected-results/A2.txt new file mode 100644 index 0000000000..55da7d815e --- /dev/null +++ b/tests/test_update_input/expected-results/A2.txt @@ -0,0 +1 @@ +B-john.txt B-doe.txt diff --git a/tests/test_update_input/expected-results/B-doe.txt b/tests/test_update_input/expected-results/B-doe.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_update_input/expected-results/B-fred.txt b/tests/test_update_input/expected-results/B-fred.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_update_input/expected-results/B-john.txt b/tests/test_update_input/expected-results/B-john.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/tests.py b/tests/tests.py index 329162eb28..5387faa99f 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1534,3 +1534,27 @@ def test_groupid_expand_cluster(): @skip_on_windows def test_service_jobs(): run(dpath("test_service_jobs"), check_md5=False) + + +def test_update_input(): + try: + # First run + tmpdir = run(dpath("test_update_input"), cleanup=False, check_results=False) + a1_txt = os.path.join(tmpdir, "A1.txt") + mtime_a1_txt = os.path.getmtime(a1_txt) + + # Prepare the update run with new values in the input function of rule A_TMP_2 + shutil.rmtree(os.path.join(tmpdir, "expected-results")) + shutil.rmtree(os.path.join(tmpdir, ".snakemake")) + run( + dpath("test_update_input"), + config={"names": "john,doe"}, + cores=1, + tmpdir=tmpdir, + cleanup=False, + ) + + # Check that A1.txt is left untouched. + assert os.path.getmtime(a1_txt) == mtime_a1_txt + finally: + shutil.rmtree(tmpdir)