Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: set mtime for cached source files [WIP] #1443

Merged
merged 8 commits into from Mar 1, 2022
Merged
28 changes: 28 additions & 0 deletions snakemake/sourcecache.py
Expand Up @@ -11,6 +11,7 @@
import tempfile
import io
from abc import ABC, abstractmethod
from datetime import datetime


from snakemake.common import (
Expand Down Expand Up @@ -60,6 +61,10 @@ def join(self, path):
path = path.get_path_or_uri()
return self.__class__(smart_join(self.get_path_or_uri(), path))

def mtime(self):
"""If possible, return mtime of the file. Otherwise, return None."""
return None

def __hash__(self):
return self.get_path_or_uri().__hash__()

Expand Down Expand Up @@ -111,6 +116,9 @@ def isabs(self):
def simplify_path(self):
return utils.simplify_path(self.path)

def mtime(self):
return os.stat(self.path).st_mtime

def __fspath__(self):
return self.path

Expand Down Expand Up @@ -225,6 +233,18 @@ class GithubFile(HostingProviderFile):
def get_path_or_uri(self):
return "https://github.com/{}/raw/{}/{}".format(self.repo, self.ref, self.path)

def mtime(self):
import requests

url = f"https://api.github.com/repos/{self.repo}/commits?path={self.path}&page=1&per_page=1"
mtime = requests.get(url).json()[0]["commit"]["committer"]["date"]
assert mtime.endswith(
"Z"
), "bug: expected suffix Z on Github provided time stamp"
# assume UTC and make it understandable to fromisoformat
mtime = mtime[:-1] + "+00:00"
return datetime.fromisoformat(mtime).timestamp()


class GitlabFile(HostingProviderFile):
def __init__(
Expand Down Expand Up @@ -345,6 +365,14 @@ def _do_cache(self, source_file, cache_entry):
) as cache_source:
cache_source.write(source.read())

mtime = source_file.mtime()
if mtime is not None:
# Set to mtime of original file
# In case we don't have that mtime, it is fine
# to just keep the time at the time of caching
# as mtime.
os.utime(cache_entry, times=(mtime, mtime))

def _open(self, path_or_uri, mode):
from smart_open import open

Expand Down