Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: trigger rerun when new files are yield by an input function #1462

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/project_info/faq.rst
Expand Up @@ -48,7 +48,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag

In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line.

Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``.
Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``.
This way, you can easily check rule by rule if it does what you expect.
However, note that ``--allowed-rules`` is only meant for debugging.
A workflow should always work fine without it.
Expand Down Expand Up @@ -285,7 +285,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr
How should Snakefiles be formatted?
--------------------------------------

To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt <https://github.com/snakemake/snakefmt>`_.
To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt <https://github.com/snakemake/snakefmt>`_.

Python code gets formatted with `black <https://github.com/psf/black>`_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 <https://www.python.org/dev/peps/pep-0008/>`_).

Expand Down Expand Up @@ -484,6 +484,8 @@ Snakemake has a kind of "lazy" policy about added input files if their modificat

Here, ``snakemake --list-input-changes`` returns the list of output files with changed input files, which is fed into ``-R`` to trigger a re-run.

It is worth mentioning that if the additional input files does not yet exist and can be found in outputs of another rules, the input files can be marked as `missing` to generate the missing dependencies and re-run the rule (see :ref:`snakefiles-missing-input`).


How do I trigger re-runs for rules with updated code or parameters?
-------------------------------------------------------------------
Expand Down
123 changes: 89 additions & 34 deletions docs/snakefiles/rules.rst

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion snakemake/dag.py
Expand Up @@ -996,7 +996,12 @@ def update_needrun(job):
output_mintime_ = output_mintime.get(job)
if output_mintime_:
updated_input = [
f for f in job.input if f.exists and f.is_newer(output_mintime_)
f
for f in job.input
if (
(f.exists and f.is_newer(output_mintime_))
or (not f.exists and is_flagged(f, "missing"))
)
]
reason.updated_input.update(updated_input)
if noinitreason and reason:
Expand Down
7 changes: 7 additions & 0 deletions snakemake/io.py
Expand Up @@ -987,6 +987,13 @@ def ancient(value):
return flag(value, "ancient")


def missing(value):
"""
Re run if new input files are missing; ie missing files will be generated first and then the considered rule is regenerated.
"""
return flag(value, "missing")


def directory(value):
"""
A flag to specify that output is a directory, rather than a file or named pipe.
Expand Down
6 changes: 6 additions & 0 deletions snakemake/rules.py
Expand Up @@ -20,6 +20,7 @@
_IOFile,
protected,
temp,
missing,
dynamic,
Namedlist,
AnnotatedString,
Expand Down Expand Up @@ -774,6 +775,7 @@ def _apply_wildcards(
for name, item in olditems._allitems():
start = len(newitems)
is_unpack = is_flagged(item, "unpack")
is_missing = is_flagged(item, "missing")
_is_callable = is_callable(item)

if _is_callable:
Expand Down Expand Up @@ -831,6 +833,10 @@ def _apply_wildcards(
if from_callable and apply_path_modifier and not incomplete:
item_ = self.apply_path_modifier(item_, property=property)

# Forward the missing flag is necessary
if is_missing:
item_ = missing(item_)

concrete = concretize(item_, wildcards, _is_callable)
newitems.append(concrete)
if mapping is not None:
Expand Down
1 change: 1 addition & 0 deletions snakemake/workflow.py
Expand Up @@ -41,6 +41,7 @@
temp,
temporary,
ancient,
missing,
directory,
expand,
dynamic,
Expand Down
8 changes: 5 additions & 3 deletions tests/common.py
Expand Up @@ -98,6 +98,7 @@ def run(
snakefile="Snakefile",
subpath=None,
no_tmpdir=False,
tmpdir=None,
check_md5=True,
check_results=True,
cores=3,
Expand Down Expand Up @@ -134,9 +135,10 @@ def run(
), "{} does not exist".format(results_dir)

# If we need to further check results, we won't cleanup tmpdir
tmpdir = next(tempfile._get_candidate_names())
tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir)
os.mkdir(tmpdir)
if not tmpdir:
tmpdir = next(tempfile._get_candidate_names())
tmpdir = os.path.join(tempfile.gettempdir(), "snakemake-%s" % tmpdir)
os.mkdir(tmpdir)

config = dict(config)

Expand Down
34 changes: 34 additions & 0 deletions tests/test_update_input/Snakefile
@@ -0,0 +1,34 @@
rule all:
input: "A1.txt", "A2.txt"

rule A:
input: "A{index}.tmp"
output: "A{index}.txt"
shell: "cp {input} {output}"


rule A_TMP_1:
input:
"B-fred.txt"

output:
temp("A1.tmp")

run:
f = open(output[0], "w")
f.write(' '.join(input) + "\n")

rule A_TMP_2:
input:
missing(lambda wildcards: [rules.B.output[0].format(name=name)
for name in config.get("names", "john").split(",")])
output:
temp("A2.tmp")

run:
f = open(output[0], "w")
f.write(' '.join(input) + "\n")

rule B:
output:
touch("B-{name}.txt")
1 change: 1 addition & 0 deletions tests/test_update_input/expected-results/A1.txt
@@ -0,0 +1 @@
B-fred.txt
1 change: 1 addition & 0 deletions tests/test_update_input/expected-results/A2.txt
@@ -0,0 +1 @@
B-john.txt B-doe.txt
Empty file.
Empty file.
Empty file.
32 changes: 32 additions & 0 deletions tests/tests.py
Expand Up @@ -1534,3 +1534,35 @@ def test_groupid_expand_cluster():
@skip_on_windows
def test_service_jobs():
run(dpath("test_service_jobs"), check_md5=False)


def test_update_input():
try:
# First run
tmpdir = run(dpath("test_update_input"), cleanup=False, check_results=False)
a1_txt = os.path.join(tmpdir, "A1.txt")
a2_txt = os.path.join(tmpdir, "A2.txt")
john_txt = os.path.join(tmpdir, "B-john.txt")
mtime_a1_txt = os.path.getmtime(a1_txt)
mtime_a2_txt = os.path.getmtime(a2_txt)
mtime_john_txt = os.path.getmtime(john_txt)

# Prepare the update run with new values in the input function of rule A_TMP_2
shutil.rmtree(os.path.join(tmpdir, "expected-results"))
shutil.rmtree(os.path.join(tmpdir, ".snakemake"))
run(
dpath("test_update_input"),
config={"names": "john,doe"},
cores=1,
tmpdir=tmpdir,
cleanup=False,
)

# Check that A1.txt is left untouched.
assert os.path.getmtime(a1_txt) == mtime_a1_txt
# Check that A2.txt has been regenerated
assert os.path.getmtime(a2_txt) > mtime_a2_txt
# Check that john.txt has left untouched
assert os.path.getmtime(john_txt) == mtime_john_txt
finally:
shutil.rmtree(tmpdir)