From 8ed0c8cb453b6ebf6df138391a0681ffc8442e09 Mon Sep 17 00:00:00 2001 From: Arya Massarat <23412689+aryarm@users.noreply.github.com> Date: Sun, 20 Feb 2022 23:57:10 -0800 Subject: [PATCH] fix: issue with duplicated prefix for checkpoints on cloud (#1294) * create tests for issue #574 to reproduce a problem with the use of checkpoints in cloud environments * do not add default remote prefix to files in checkpoints --- snakemake/rules.py | 2 +- .../test_cloud_checkpoints_issue574/Snakefile | 32 +++++++++++++++++++ .../config.json | 1 + tests/test_cloud_checkpoints_issue574/env.yml | 4 +++ .../expected-results/.gitkeep | 0 tests/test_google_lifesciences.py | 19 +++++++++++ 6 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 tests/test_cloud_checkpoints_issue574/Snakefile create mode 100644 tests/test_cloud_checkpoints_issue574/config.json create mode 100644 tests/test_cloud_checkpoints_issue574/env.yml create mode 100644 tests/test_cloud_checkpoints_issue574/expected-results/.gitkeep diff --git a/snakemake/rules.py b/snakemake/rules.py index 21138c129..a125cee99 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -767,7 +767,7 @@ def _apply_wildcards( is_unpack=is_unpack, **aux_params ) - if apply_path_modifier: + if apply_path_modifier and not incomplete: item = self.apply_path_modifier(item, property=property) if is_unpack and not incomplete: diff --git a/tests/test_cloud_checkpoints_issue574/Snakefile b/tests/test_cloud_checkpoints_issue574/Snakefile new file mode 100644 index 000000000..8c2692c5d --- /dev/null +++ b/tests/test_cloud_checkpoints_issue574/Snakefile @@ -0,0 +1,32 @@ +import os + +# This test file is adapted from this one: +# https://github.com/snakemake/snakemake/blob/758fabdb64255f8ca79e9c1483ceab67eb39ff07/tests/test_google_lifesciences/Snakefile +from snakemake.remote.GS import RemoteProvider as GSRemoteProvider +GS = GSRemoteProvider() + +rule all: + input: + "landsat-data.txt.bz2" + +checkpoint copy: + input: + GS.remote("gcp-public-data-landsat/LC08/01/001/003/LC08_L1GT_001003_20170430_20170501_01_RT/LC08_L1GT_001003_20170430_20170501_01_RT_MTL.txt") + output: + "landsat-data.txt" + resources: + mem_mb=100 + run: + shell("cp {input} {output}") + +rule pack: + input: + lambda wildcards: checkpoints.copy.get().output[0] + output: + "landsat-data.txt.bz2" + conda: + "env.yml" + log: + "logs/pack.log" + shell: + "bzip2 -c {input} > {output}; echo successful > {log}" diff --git a/tests/test_cloud_checkpoints_issue574/config.json b/tests/test_cloud_checkpoints_issue574/config.json new file mode 100644 index 000000000..a56a3bb85 --- /dev/null +++ b/tests/test_cloud_checkpoints_issue574/config.json @@ -0,0 +1 @@ +{"message": "hahaha"} diff --git a/tests/test_cloud_checkpoints_issue574/env.yml b/tests/test_cloud_checkpoints_issue574/env.yml new file mode 100644 index 000000000..169599737 --- /dev/null +++ b/tests/test_cloud_checkpoints_issue574/env.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - bzip2 diff --git a/tests/test_cloud_checkpoints_issue574/expected-results/.gitkeep b/tests/test_cloud_checkpoints_issue574/expected-results/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_google_lifesciences.py b/tests/test_google_lifesciences.py index 3d3b815c0..47f34d0cb 100644 --- a/tests/test_google_lifesciences.py +++ b/tests/test_google_lifesciences.py @@ -90,3 +90,22 @@ def test_touch_remote_prefix(): ) finally: cleanup_google_storage(storage_prefix, bucket_name) + + +@google_credentials +def test_cloud_checkpoints_issue574(): + """see Github issue #574""" + bucket_name = "snakemake-testing-%s" % next(tempfile._get_candidate_names()) + create_google_storage(bucket_name) + storage_prefix = "test_cloud_checkpoints_issue574" + workdir = dpath("test_cloud_checkpoints_issue574") + try: + run( + workdir, + use_conda=True, + default_remote_prefix="%s/%s" % (bucket_name, storage_prefix), + google_lifesciences=True, + google_lifesciences_cache=False, + ) + finally: + cleanup_google_storage(storage_prefix, bucket_name)