From 47b5096ebbdd3d94a9c99b443064b1b0de389c64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Fri, 28 Jan 2022 17:32:32 +0100 Subject: [PATCH] fix: proper error message when defining cache eligibility for rules with multiple output files and no multiext declaration. (#1357) * dbg message for cache fetching * dbg * handle invalid multi output rules marked as cacheable * cleanup * fmt * add testcase --- snakemake/caching/__init__.py | 3 +++ snakemake/caching/local.py | 6 ++++++ snakemake/workflow.py | 8 ++++++++ tests/test_cache_multioutput/Snakefile | 7 +++++++ tests/test_cache_multioutput/expected-results/.gitkeep | 0 tests/tests.py | 4 ++++ 6 files changed, 28 insertions(+) create mode 100644 tests/test_cache_multioutput/Snakefile create mode 100644 tests/test_cache_multioutput/expected-results/.gitkeep diff --git a/snakemake/caching/__init__.py b/snakemake/caching/__init__.py index 53356f028..b7d3049a0 100644 --- a/snakemake/caching/__init__.py +++ b/snakemake/caching/__init__.py @@ -47,6 +47,9 @@ def get_outputfiles(self, job: Job): ) yield from ((f, f[prefix_len:]) for f in job.output) else: + assert ( + len(job.output) == 1 + ), "bug: multiple output files in cacheable job but multiext not used for declaring them" yield (job.output[0], "") def raise_write_error(self, entry, exception=None): diff --git a/snakemake/caching/local.py b/snakemake/caching/local.py index 471308b95..ad701c36b 100644 --- a/snakemake/caching/local.py +++ b/snakemake/caching/local.py @@ -95,6 +95,12 @@ def fetch(self, job: Job): if not cachefile.exists(): self.raise_cache_miss_exception(job) + logger.debug( + "Output file {} exists as {} in the cache.".format( + outputfile, cachefile + ) + ) + self.check_readable(cachefile) if cachefile.is_dir(): # For directories, create a new one and symlink each entry. diff --git a/snakemake/workflow.py b/snakemake/workflow.py index 3d5c6cbc5..a13f8c0af 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -1541,6 +1541,14 @@ def decorate(ruleinfo): rule.is_handover = True if ruleinfo.cache is True: + if len(rule.output) > 1: + if not rule.output[0].is_multiext: + raise WorkflowError( + "Rule is marked for between workflow caching but has multiple output files. " + "This is only allowed if multiext() is used to declare them (see docs on between " + "workflow caching).", + rule=rule, + ) if not self.enable_cache: logger.warning( "Workflow defines that rule {} is eligible for caching between workflows " diff --git a/tests/test_cache_multioutput/Snakefile b/tests/test_cache_multioutput/Snakefile new file mode 100644 index 000000000..a01fafe61 --- /dev/null +++ b/tests/test_cache_multioutput/Snakefile @@ -0,0 +1,7 @@ +rule a: + output: + "1.txt", + "2.txt", + cache: True + shell: + "touch {output}" \ No newline at end of file diff --git a/tests/test_cache_multioutput/expected-results/.gitkeep b/tests/test_cache_multioutput/expected-results/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tests.py b/tests/tests.py index c7da69b8c..b1e05d1bc 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1397,3 +1397,7 @@ def test_modules_ruledeps_inheritance(): @skip_on_windows def test_conda_named(): run(dpath("test_conda_named"), use_conda=True) + + +def test_cache_multioutput(): + run(dpath("test_cache_multioutput"), shouldfail=True)