From 0ac3b3806c065d0ec3a551a5992faf30ddcf0576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Fri, 16 Jul 2021 19:18:53 +0200 Subject: [PATCH] feat: Allow to mark all output files as temp with --all-temp (#1097) * mark all output as temp * move logic into rule * fixes --- snakemake/__init__.py | 14 +++++++++-- snakemake/rules.py | 7 ++++++ snakemake/workflow.py | 2 ++ tests/test_all_temp/Snakefile | 23 +++++++++++++++++++ tests/test_all_temp/expected-results/.gitkeep | 0 tests/tests.py | 4 ++++ 6 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 tests/test_all_temp/Snakefile create mode 100644 tests/test_all_temp/expected-results/.gitkeep diff --git a/snakemake/__init__.py b/snakemake/__init__.py index c7119ae35..4856e13d2 100644 --- a/snakemake/__init__.py +++ b/snakemake/__init__.py @@ -116,6 +116,7 @@ def snakemake( print_compilation=False, debug=False, notemp=False, + all_temp=False, keep_remote_local=False, nodeps=False, keep_target_files=False, @@ -242,8 +243,8 @@ def snakemake( list_untracked (bool): list files in the workdir that are not used in the workflow (default False) summary (bool): list summary of all output files and their status (default False) archive (str): archive workflow into the given tarball - delete_all_output (bool) remove all files generated by the workflow (default False) - delete_temp_output (bool) remove all temporary files generated by the workflow (default False) + delete_all_output (bool): remove all files generated by the workflow (default False) + delete_temp_output (bool): remove all temporary files generated by the workflow (default False) latency_wait (int): how many seconds to wait for an output file to appear after the execution of a job, e.g. to handle filesystem latency (default 3) wait_for_files (list): wait for given files to be present before executing the workflow list_resources (bool): list resources used in the workflow (default False) @@ -585,6 +586,7 @@ def snakemake( scheduler_solver_path=scheduler_solver_path, conda_base_path=conda_base_path, check_envvars=not lint, # for linting, we do not need to check whether requested envvars exist + all_temp=all_temp, ) success = True @@ -645,6 +647,7 @@ def snakemake( latency_wait=latency_wait, verbose=verbose, notemp=notemp, + all_temp=all_temp, keep_remote_local=keep_remote_local, nodeps=nodeps, jobscript=jobscript, @@ -1857,6 +1860,12 @@ def get_argument_parser(profile=None): "a part of the workflow, since temp() would lead to deletion of " "probably needed files by other parts of the workflow.", ) + group_behavior.add_argument( + "--all-temp", + action="store_true", + help="Mark all output files as temp files. This can be useful for CI testing, " + "in order to save space.", + ) group_behavior.add_argument( "--keep-remote", action="store_true", @@ -2815,6 +2824,7 @@ def open_browser(): debug=args.debug, jobscript=args.jobscript, notemp=args.notemp, + all_temp=args.all_temp, keep_remote_local=args.keep_remote, greediness=args.greediness, no_hooks=args.no_hooks, diff --git a/snakemake/rules.py b/snakemake/rules.py index 775ef6c7d..39b05e27b 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -56,6 +56,7 @@ ) from snakemake.logging import logger from snakemake.common import Mode, ON_WINDOWS, lazy_property, TBDString +import snakemake.io class Rule: @@ -543,8 +544,14 @@ def _set_inoutput_item(self, item, output=False, name=None): self ) ) + + if self.workflow.all_temp and output: + # mark as temp if all output files shall be marked as temp + item = snakemake.io.flag(item, "temp") + # record rule if this is an output file output _item = IOFile(item, rule=self) + if is_flagged(item, "temp"): if output: self.temp_output.add(_item) diff --git a/snakemake/workflow.py b/snakemake/workflow.py index af689a01f..b5a33dfd5 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -136,6 +136,7 @@ def __init__( conda_base_path=None, check_envvars=True, max_threads=None, + all_temp=False, ): """ Create the controller. @@ -215,6 +216,7 @@ def __init__( self._conda_base_path = conda_base_path self.check_envvars = check_envvars self.max_threads = max_threads + self.all_temp = all_temp _globals = globals() _globals["workflow"] = self diff --git a/tests/test_all_temp/Snakefile b/tests/test_all_temp/Snakefile new file mode 100644 index 000000000..46461234b --- /dev/null +++ b/tests/test_all_temp/Snakefile @@ -0,0 +1,23 @@ +rule all: + input: + "test2.txt" + run: + import os + if os.path.exists("test1.txt"): + raise ValueError("test1.txt still present!") + + +rule a: + output: + "test1.txt" + shell: + "touch {output}" + + +rule b: + input: + "test1.txt" + output: + "test2.txt" + shell: + "touch {output}" \ No newline at end of file diff --git a/tests/test_all_temp/expected-results/.gitkeep b/tests/test_all_temp/expected-results/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tests.py b/tests/tests.py index ddb4bdd29..3a75e3823 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1282,3 +1282,7 @@ def test_github_issue1069(): def test_touch_pipeline_with_temp_dir(): # Issue #1028 run(dpath("test_touch_pipeline_with_temp_dir"), forceall=True, touch=True) + + +def test_all_temp(): + run(dpath("test_all_temp"), all_temp=True)