snakemake · cclienti · Mar 7, 2022
@@ -48,7 +48,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag
 
 In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line.
 
-Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. 
+Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``.
 This way, you can easily check rule by rule if it does what you expect.
 However, note that ``--allowed-rules`` is only meant for debugging.
 A workflow should always work fine without it.
@@ -285,7 +285,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr
 How should Snakefiles be formatted?
 --------------------------------------
 
-To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt <https://github.com/snakemake/snakefmt>`_. 
+To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt <https://github.com/snakemake/snakefmt>`_.
 
 Python code gets formatted with `black <https://github.com/psf/black>`_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 <https://www.python.org/dev/peps/pep-0008/>`_).
 
@@ -484,6 +484,8 @@ Snakemake has a kind of "lazy" policy about added input files if their modificat
 
 Here, ``snakemake --list-input-changes`` returns the list of output files with changed input files, which is fed into ``-R`` to trigger a re-run.
 
+It is worth mentioning that if the additional input files does not yet exist and can be found in outputs of another rules, the input files can be marked as `missing` to generate the missing dependencies and re-run the rule (see :ref:`snakefiles-missing-input`).
+
 
 How do I trigger re-runs for rules with updated code or parameters?
 -------------------------------------------------------------------

@@ -996,7 +996,12 @@ def update_needrun(job):
                 output_mintime_ = output_mintime.get(job)
                 if output_mintime_:
                     updated_input = [
-                        f for f in job.input if f.exists and f.is_newer(output_mintime_)
+                        f
+                        for f in job.input
+                        if (
+                            (f.exists and f.is_newer(output_mintime_))
+                            or (not f.exists and is_flagged(f, "missing"))
+                        )
                     ]
                     reason.updated_input.update(updated_input)
             if noinitreason and reason:

@@ -987,6 +987,13 @@ def ancient(value):
     return flag(value, "ancient")
 
 
+def missing(value):
+    """
+    Re run if new input files are missing; ie missing files will be generated first and then the considered rule is regenerated.
+    """
+    return flag(value, "missing")
+
+
 def directory(value):
     """
     A flag to specify that output is a directory, rather than a file or named pipe.

@@ -20,6 +20,7 @@
     _IOFile,
     protected,
     temp,
+    missing,
     dynamic,
     Namedlist,
     AnnotatedString,
@@ -774,6 +775,7 @@ def _apply_wildcards(
         for name, item in olditems._allitems():
             start = len(newitems)
             is_unpack = is_flagged(item, "unpack")
+            is_missing = is_flagged(item, "missing")
             _is_callable = is_callable(item)
 
             if _is_callable:
@@ -831,6 +833,10 @@ def _apply_wildcards(
                     if from_callable and apply_path_modifier and not incomplete:
                         item_ = self.apply_path_modifier(item_, property=property)
 
+                    # Forward the missing flag is necessary
+                    if is_missing:
+                        item_ = missing(item_)
+
                     concrete = concretize(item_, wildcards, _is_callable)
                     newitems.append(concrete)
                     if mapping is not None:

@@ -41,6 +41,7 @@
     temp,
     temporary,
     ancient,
+    missing,
     directory,
     expand,
     dynamic,

@@ -98,6 +98,7 @@ def run(
     snakefile="Snakefile",
     subpath=None,
     no_tmpdir=False,
+    tmpdir=None,
     check_md5=True,
     check_results=True,
     cores=3,
@@ -134,9 +135,10 @@ def run(
     ), "{} does not exist".format(results_dir)
 
     # If we need to further check results, we won't cleanup tmpdir
-    tmpdir = next(tempfile._get_candidate_names())
-    tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir)
-    os.mkdir(tmpdir)
+    if not tmpdir:
+        tmpdir = next(tempfile._get_candidate_names())
+        tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir)
+        os.mkdir(tmpdir)
 
     config = dict(config)
 

@@ -0,0 +1,34 @@
+rule all:
+    input: "A1.txt", "A2.txt"
+
+rule A:
+    input: "A{index}.tmp"
+    output: "A{index}.txt"
+    shell: "cp {input} {output}"
+
+
+rule A_TMP_1:
+    input:
+        "B-fred.txt"
+
+    output:
+        temp("A1.tmp")
+
+    run:
+        f = open(output[0], "w")
+        f.write(' '.join(input) + "\n")
+
+rule A_TMP_2:
+    input:
+        missing(lambda wildcards: [rules.B.output[0].format(name=name)
+                                   for name in config.get("names", "john").split(",")])
+    output:
+        temp("A2.tmp")
+
+    run:
+        f = open(output[0], "w")
+        f.write(' '.join(input) + "\n")
+
+rule B:
+    output:
+        touch("B-{name}.txt")
@@ -0,0 +1 @@
+B-fred.txt
@@ -0,0 +1 @@
+B-john.txt B-doe.txt
@@ -1534,3 +1534,35 @@ def test_groupid_expand_cluster():
 @skip_on_windows
 def test_service_jobs():
     run(dpath("test_service_jobs"), check_md5=False)
+
+
+def test_update_input():
+    try:
+        # First run
+        tmpdir = run(dpath("test_update_input"), cleanup=False, check_results=False)
+        a1_txt = os.path.join(tmpdir, "A1.txt")
+        a2_txt = os.path.join(tmpdir, "A2.txt")
+        john_txt = os.path.join(tmpdir, "B-john.txt")
+        mtime_a1_txt = os.path.getmtime(a1_txt)
+        mtime_a2_txt = os.path.getmtime(a2_txt)
+        mtime_john_txt = os.path.getmtime(john_txt)
+
+        # Prepare the update run with new values in the input function of rule A_TMP_2
+        shutil.rmtree(os.path.join(tmpdir, "expected-results"))
+        shutil.rmtree(os.path.join(tmpdir, ".snakemake"))
+        run(
+            dpath("test_update_input"),
+            config={"names": "john,doe"},
+            cores=1,
+            tmpdir=tmpdir,
+            cleanup=False,
+        )
+
+        # Check that A1.txt is left untouched.
+        assert os.path.getmtime(a1_txt) == mtime_a1_txt
+        # Check that A2.txt has been regenerated
+        assert os.path.getmtime(a2_txt) > mtime_a2_txt
+        # Check that john.txt has left untouched
+        assert os.path.getmtime(john_txt) == mtime_john_txt
+    finally:
+        shutil.rmtree(tmpdir)