From dd27209b4a600d3704cabc39776dfef718129197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Tue, 1 Mar 2022 18:05:32 +0100 Subject: [PATCH 1/2] fix: set mtime for cached source files [WIP] (#1443) * fix: set mtime for cached source files * Implement mtime for local files and github. * logging * fix: remove abstract flag * fix import * keep mtime if we cannot determine it * get latest commit * fix datetime handling for github case --- snakemake/sourcecache.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/snakemake/sourcecache.py b/snakemake/sourcecache.py index 9b0c839d9..ff08fe692 100644 --- a/snakemake/sourcecache.py +++ b/snakemake/sourcecache.py @@ -11,6 +11,7 @@ import tempfile import io from abc import ABC, abstractmethod +from datetime import datetime from snakemake.common import ( @@ -60,6 +61,10 @@ def join(self, path): path = path.get_path_or_uri() return self.__class__(smart_join(self.get_path_or_uri(), path)) + def mtime(self): + """If possible, return mtime of the file. Otherwise, return None.""" + return None + def __hash__(self): return self.get_path_or_uri().__hash__() @@ -111,6 +116,9 @@ def isabs(self): def simplify_path(self): return utils.simplify_path(self.path) + def mtime(self): + return os.stat(self.path).st_mtime + def __fspath__(self): return self.path @@ -225,6 +233,18 @@ class GithubFile(HostingProviderFile): def get_path_or_uri(self): return "https://github.com/{}/raw/{}/{}".format(self.repo, self.ref, self.path) + def mtime(self): + import requests + + url = f"https://api.github.com/repos/{self.repo}/commits?path={self.path}&page=1&per_page=1" + mtime = requests.get(url).json()[0]["commit"]["committer"]["date"] + assert mtime.endswith( + "Z" + ), "bug: expected suffix Z on Github provided time stamp" + # assume UTC and make it understandable to fromisoformat + mtime = mtime[:-1] + "+00:00" + return datetime.fromisoformat(mtime).timestamp() + class GitlabFile(HostingProviderFile): def __init__( @@ -345,6 +365,14 @@ def _do_cache(self, source_file, cache_entry): ) as cache_source: cache_source.write(source.read()) + mtime = source_file.mtime() + if mtime is not None: + # Set to mtime of original file + # In case we don't have that mtime, it is fine + # to just keep the time at the time of caching + # as mtime. + os.utime(cache_entry, times=(mtime, mtime)) + def _open(self, path_or_uri, mode): from smart_open import open From 82666f1b2b043f0a8de739d7027aba66eccdaee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Tue, 1 Mar 2022 18:06:19 +0100 Subject: [PATCH 2/2] fix: failure to properly apply default remote prefix in combination with the unpack marker (#1448) * fix: failure to properly apply default remote prefix in combination with the unpack marker * only in case of from_callable --- snakemake/rules.py | 14 ++++++++------ tests/test_github_issue1396/Snakefile | 16 ++++++++++++++++ .../expected-results/.gitkeep | 0 tests/test_google_lifesciences.py | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 tests/test_github_issue1396/Snakefile create mode 100644 tests/test_github_issue1396/expected-results/.gitkeep diff --git a/snakemake/rules.py b/snakemake/rules.py index 85f2a3734..1c478cbe7 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -787,8 +787,6 @@ def _apply_wildcards( groupid=groupid, **aux_params ) - if apply_path_modifier and not incomplete: - item = self.apply_path_modifier(item, property=property) if is_unpack and not incomplete: if not allow_unpack: @@ -808,14 +806,14 @@ def _apply_wildcards( ) # Allow streamlined code with/without unpack if isinstance(item, list): - pairs = zip([None] * len(item), item) + pairs = zip([None] * len(item), item, [_is_callable] * len(item)) else: assert isinstance(item, dict) - pairs = item.items() + pairs = [(name, item, _is_callable) for name, item in item.items()] else: - pairs = [(name, item)] + pairs = [(name, item, _is_callable)] - for name, item in pairs: + for name, item, from_callable in pairs: is_iterable = True if not_iterable(item) or no_flattening: item = [item] @@ -829,6 +827,10 @@ def _apply_wildcards( raise WorkflowError( "Function did not return str or list " "of str.", rule=self ) + + if from_callable and apply_path_modifier and not incomplete: + item_ = self.apply_path_modifier(item_, property=property) + concrete = concretize(item_, wildcards, _is_callable) newitems.append(concrete) if mapping is not None: diff --git a/tests/test_github_issue1396/Snakefile b/tests/test_github_issue1396/Snakefile new file mode 100644 index 000000000..434704d7f --- /dev/null +++ b/tests/test_github_issue1396/Snakefile @@ -0,0 +1,16 @@ +def get_files(wildcards): + files_1 = expand("file_{i}", i=list(range(1, 5))) + files_2 = expand("file_{i}", i=list(range(5, 9))) + return {"files_1": files_1, "files_2": files_2} + + +rule all: + input: + unpack(get_files), + + +rule make_files: + output: + expand("file_{i}", i=list(range(1, 9))), + shell: + "touch {output}" diff --git a/tests/test_github_issue1396/expected-results/.gitkeep b/tests/test_github_issue1396/expected-results/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_google_lifesciences.py b/tests/test_google_lifesciences.py index 47f34d0cb..4c6366e95 100644 --- a/tests/test_google_lifesciences.py +++ b/tests/test_google_lifesciences.py @@ -109,3 +109,20 @@ def test_cloud_checkpoints_issue574(): ) finally: cleanup_google_storage(storage_prefix, bucket_name) + + +def test_github_issue1396(): + bucket_name = "snakemake-testing-%s" % next(tempfile._get_candidate_names()) + create_google_storage(bucket_name) + storage_prefix = "test_github_issue1396" + workdir = dpath("test_github_issue1396") + try: + run( + workdir, + default_remote_prefix="%s/%s" % (bucket_name, storage_prefix), + google_lifesciences=True, + google_lifesciences_cache=False, + dryrun=True, + ) + finally: + cleanup_google_storage(storage_prefix, bucket_name)