From e469c07f83418c4da76ad88b99f9125a3e308681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Tr=C3=B6ndle?= Date: Mon, 26 Jul 2021 11:20:46 +0200 Subject: [PATCH 1/3] Add rerun-params-changed commandline option This commandline options allows to automatically rerun all rules for which parameters have changed. Fixes #976. --- docs/project_info/faq.rst | 15 +++++++-------- snakemake/__init__.py | 13 ++++++++++++- snakemake/dag.py | 27 +++++++++++++++++++++++++++ snakemake/workflow.py | 2 ++ tests/tests.py | 21 +++++++++++++++++++++ 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/docs/project_info/faq.rst b/docs/project_info/faq.rst index 4d51c004b..2b32c1a8e 100644 --- a/docs/project_info/faq.rst +++ b/docs/project_info/faq.rst @@ -25,7 +25,7 @@ For debugging such cases, Snakemake provides the command line flag ``--debug-dag In addition, it is advisable to check whether certain intermediate files would be created by targetting them individually via the command line. -Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. +Finally, it is possible to constrain the rules that are considered for DAG creating via ``--allowed-rules``. This way, you can easily check rule by rule if it does what you expect. However, note that ``--allowed-rules`` is only meant for debugging. A workflow should always work fine without it. @@ -262,7 +262,7 @@ This will cause Snakemake to re-run all jobs of that rule and everything downstr How should Snakefiles be formatted? -------------------------------------- -To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. +To ensure readability and consistency, you can format Snakefiles with our tool `snakefmt `_. Python code gets formatted with `black `_ and Snakemake-specific blocks are formatted using similar principles (such as `PEP8 `_). @@ -469,16 +469,15 @@ Similar to the solution above, you can use .. code-block:: console - $ snakemake -n -R `snakemake --list-params-changes` - -and + $ snakemake -n -R `snakemake --list-code-changes` -.. code-block:: console +Again, the list command in backticks returns the list of output files with changes, which are fed into ``-R`` to trigger a re-run. +For updated parameters, you can use - $ snakemake -n -R `snakemake --list-code-changes` +.. code-block:: console -Again, the list commands in backticks return the list of output files with changes, which are fed into ``-R`` to trigger a re-run. + $ snakemake --rerun-params-changed How do I remove all files created by snakemake, i.e. like ``make clean`` diff --git a/snakemake/__init__.py b/snakemake/__init__.py index 4856e13d2..84fbf027b 100644 --- a/snakemake/__init__.py +++ b/snakemake/__init__.py @@ -99,6 +99,7 @@ def snakemake( cleanup_shadow=False, cleanup_scripts=True, force_incomplete=False, + force_params_changed=False, ignore_incomplete=False, list_version_changes=False, list_code_changes=False, @@ -235,6 +236,7 @@ def snakemake( cleanup_shadow (bool): just cleanup old shadow directories (default False) cleanup_scripts (bool): delete wrapper scripts used for execution (default True) force_incomplete (bool): force the re-creation of incomplete files (default False) + force_params_changed (bool): force the re-creation of files with updated parameters (default False) ignore_incomplete (bool): ignore incomplete files (default False) list_version_changes (bool): list output files with changed rule version (default False) list_code_changes (bool): list output files with changed rule code (default False) @@ -643,6 +645,7 @@ def snakemake( cleanup_shadow=cleanup_shadow, cleanup_scripts=cleanup_scripts, force_incomplete=force_incomplete, + force_params_changed=force_params_changed, ignore_incomplete=ignore_incomplete, latency_wait=latency_wait, verbose=verbose, @@ -742,6 +745,7 @@ def snakemake( ignore_ambiguity=ignore_ambiguity, stats=stats, force_incomplete=force_incomplete, + force_params_changed=force_params_changed, ignore_incomplete=ignore_incomplete, list_version_changes=list_version_changes, list_code_changes=list_code_changes, @@ -1051,7 +1055,7 @@ def get_argument_parser(profile=None): line options in YAML format. For example, '--cluster qsub' becomes 'cluster: qsub' in the YAML file. Profiles can be obtained from - https://github.com/snakemake-profiles. + https://github.com/snakemake-profiles. The profile can also be set via the environment variable $SNAKEMAKE_PROFILE. """.format( dirs.site_config_dir, dirs.user_config_dir @@ -1362,6 +1366,12 @@ def get_argument_parser(profile=None): action="store_true", help=("Re-run all " "jobs the output of which is recognized as incomplete."), ) + group_exec.add_argument( + "--rerun-params-changed", + "--rp", + action="store_true", + help=("Re-run all " "jobs whose parameters have changed."), + ) group_exec.add_argument( "--shadow-prefix", metavar="DIR", @@ -2808,6 +2818,7 @@ def open_browser(): cleanup_shadow=args.cleanup_shadow, cleanup_scripts=not args.skip_script_cleanup, force_incomplete=args.rerun_incomplete, + force_params_changed=args.rerun_params_changed, ignore_incomplete=args.ignore_incomplete, list_version_changes=args.list_version_changes, list_code_changes=args.list_code_changes, diff --git a/snakemake/dag.py b/snakemake/dag.py index 6035aad68..976ff6b0f 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -95,6 +95,7 @@ def __init__( omitrules=None, ignore_ambiguity=False, force_incomplete=False, + force_params_changed=False, ignore_incomplete=False, notemp=False, keep_remote_local=False, @@ -167,6 +168,7 @@ def __init__( self.omitrules.add(batch.rulename) self.force_incomplete = force_incomplete + self.force_params_changed = force_params_changed self.ignore_incomplete = ignore_incomplete self.periodic_wildcard_detector = PeriodicityDetector() @@ -191,6 +193,8 @@ def init(self, progress=False): self.cleanup() self.check_incomplete() + if self.force_params_changed: + self.rerun_params_changed() self.update_needrun(create_inventory=True) self.set_until_jobs() @@ -335,6 +339,14 @@ def check_incomplete(self): else: raise IncompleteFilesException(incomplete) + def rerun_params_changed(self): + """Force rerun of files for which parameters changed.""" + files = self.params_changed_files + if files: + logger.debug("Forcing files for which parameters changed:") + logger.debug("\t" + "\n\t".join(files)) + self.forcefiles.update(files) + def incomplete_external_jobid(self, job): """Return the external jobid of the job if it is marked as incomplete. @@ -452,6 +464,21 @@ def incomplete_files(self): ) ) + @property + def params_changed_files(self): + """Return list of files for which parameters changed.""" + return list( + chain( + *( + job.output + for job in filter( + self.workflow.persistence.params_changed, + filterfalse(self.needrun, self.jobs), + ) + ) + ) + ) + @property def newversion_files(self): """Return list of files where the current version is newer than the diff --git a/snakemake/workflow.py b/snakemake/workflow.py index b5a33dfd5..642675c3e 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -566,6 +566,7 @@ def execute( container_image=None, stats=None, force_incomplete=False, + force_params_changed=False, ignore_incomplete=False, list_version_changes=False, list_code_changes=False, @@ -699,6 +700,7 @@ def files(items): omitrules=omitrules, ignore_ambiguity=ignore_ambiguity, force_incomplete=force_incomplete, + force_params_changed=force_params_changed, ignore_incomplete=ignore_incomplete or printdag or printrulegraph diff --git a/tests/tests.py b/tests/tests.py index 3a75e3823..88511a239 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1286,3 +1286,24 @@ def test_touch_pipeline_with_temp_dir(): def test_all_temp(): run(dpath("test_all_temp"), all_temp=True) + + +def test_no_rerun_params_changed_without_commandline_flag(): + tmpdir = run(dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False) + run(tmpdir, config={"param1": 3}, cleanup=True) + shutil.rmtree(tmpdir) + + +def test_rerun_params_changed_with_commandline_flag(): + tmpdir = run(dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False) + # change expected result from 5 to 3 + path_to_expected_result = os.path.join(tmpdir, "expected-results/param.txt") + with open(path_to_expected_result, "w") as f_res: + f_res.write("3") + # rerun and expect 3 + run( + tmpdir, + shellcmd="snakemake --rerun-params-changed --cores 1 --config param1=3", + cleanup=True + ) + shutil.rmtree(tmpdir) From 082fe2b923a7d5841b9b59a7b23611af504d5f95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Tr=C3=B6ndle?= Date: Thu, 12 Aug 2021 15:05:02 +0200 Subject: [PATCH 2/3] Add test folder for --rerun-params-changed --- tests/test_rerun_params_changed/Snakefile | 8 ++++++++ .../test_rerun_params_changed/expected-results/param.txt | 1 + 2 files changed, 9 insertions(+) create mode 100644 tests/test_rerun_params_changed/Snakefile create mode 100644 tests/test_rerun_params_changed/expected-results/param.txt diff --git a/tests/test_rerun_params_changed/Snakefile b/tests/test_rerun_params_changed/Snakefile new file mode 100644 index 000000000..c7518e415 --- /dev/null +++ b/tests/test_rerun_params_changed/Snakefile @@ -0,0 +1,8 @@ +rule write_param_to_file: + params: + param1 = config["param1"] + output: + "param.txt" + run: + with open("param.txt", "w") as f_param: + f_param.writelines([str(params.param1)]) diff --git a/tests/test_rerun_params_changed/expected-results/param.txt b/tests/test_rerun_params_changed/expected-results/param.txt new file mode 100644 index 000000000..7813681f5 --- /dev/null +++ b/tests/test_rerun_params_changed/expected-results/param.txt @@ -0,0 +1 @@ +5 \ No newline at end of file From 22cc0eda2f41a8700279d336012dc9e6849f461a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Tr=C3=B6ndle?= Date: Wed, 18 Aug 2021 17:07:22 +0200 Subject: [PATCH 3/3] Update tests by applying black --- tests/tests.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 88511a239..130c843f3 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1289,13 +1289,17 @@ def test_all_temp(): def test_no_rerun_params_changed_without_commandline_flag(): - tmpdir = run(dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False) + tmpdir = run( + dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False + ) run(tmpdir, config={"param1": 3}, cleanup=True) shutil.rmtree(tmpdir) def test_rerun_params_changed_with_commandline_flag(): - tmpdir = run(dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False) + tmpdir = run( + dpath("test_rerun_params_changed"), config={"param1": 5}, cleanup=False + ) # change expected result from 5 to 3 path_to_expected_result = os.path.join(tmpdir, "expected-results/param.txt") with open(path_to_expected_result, "w") as f_res: @@ -1304,6 +1308,6 @@ def test_rerun_params_changed_with_commandline_flag(): run( tmpdir, shellcmd="snakemake --rerun-params-changed --cores 1 --config param1=3", - cleanup=True + cleanup=True, ) shutil.rmtree(tmpdir)