/
XRootD.py
289 lines (241 loc) · 9 KB
/
XRootD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
__author__ = "Chris Burr"
__copyright__ = "Copyright 2022, Chris Burr"
__email__ = "christopher.burr@cern.ch"
__license__ = "MIT"
import os
from os.path import abspath, join, normpath
import re
from stat import S_ISREG
from snakemake.remote import (
AbstractRemoteObject,
AbstractRemoteProvider,
AbstractRemoteRetryObject,
)
from snakemake.exceptions import WorkflowError, XRootDFileException
try:
from XRootD import client
from XRootD.client.flags import DirListFlags, MkDirFlags, StatInfoFlags
except ImportError as e:
raise WorkflowError(
"The Python 3 package 'XRootD' must be installed to use XRootD "
"remote() file functionality. %s" % e.msg
)
class RemoteProvider(AbstractRemoteProvider):
supports_default = True
def __init__(
self, *args, keep_local=False, stay_on_remote=False, is_default=False, **kwargs
):
super(RemoteProvider, self).__init__(
*args,
keep_local=keep_local,
stay_on_remote=stay_on_remote,
is_default=is_default,
**kwargs,
)
self._xrd = XRootDHelper()
def remote_interface(self):
return self._xrd
@property
def default_protocol(self):
"""The protocol that is prepended to the path when no protocol is specified."""
return "root://"
@property
def available_protocols(self):
"""List of valid protocols for this remote provider."""
return ["root://", "roots://", "rootk://"]
class RemoteObject(AbstractRemoteRetryObject):
"""This is a class to interact with XRootD servers."""
def __init__(
self, *args, keep_local=False, stay_on_remote=False, provider=None, **kwargs
):
super(RemoteObject, self).__init__(
*args,
keep_local=keep_local,
stay_on_remote=stay_on_remote,
provider=provider,
**kwargs,
)
if provider:
self._xrd = provider.remote_interface()
else:
self._xrd = XRootDHelper()
# === Implementations of abstract class members ===
def exists(self):
return self._xrd.exists(self.remote_file())
def mtime(self):
if self.exists():
return self._xrd.file_last_modified(self.remote_file())
else:
raise XRootDFileException(
"The file does not seem to exist remotely: %s" % self.remote_file()
)
def size(self):
if self.exists():
return self._xrd.file_size(self.remote_file())
else:
return self._iofile.size_local
def _download(self):
assert not self.stay_on_remote
self._xrd.copy(self.remote_file(), self.file())
def _upload(self):
assert not self.stay_on_remote
self._xrd.copy(self.file(), self.remote_file())
@property
def name(self):
return self.local_file()
@property
def list(self):
dirname = os.path.dirname(self._iofile.constant_prefix()) + "/"
files = list(self._xrd.list_directory_recursive(dirname))
return [normpath(f) for f in files]
def remove(self):
self._xrd.remove(self.remote_file())
class XRootDHelper(object):
def __init__(self):
self._clients = {}
def get_client(self, domain):
try:
return self._clients[domain]
except KeyError:
self._clients[domain] = client.FileSystem(domain)
return self._clients[domain]
def _parse_url(self, url):
match = re.search(
"(?P<domain>(?:[A-Za-z]+://)[A-Za-z0-9:@\_\-\.]+\:?/)(?P<path>.+)", url
)
if match is None:
return None
domain = match.group("domain")
dirname, filename = os.path.split(match.group("path"))
# We need a trailing / to keep XRootD happy
dirname += "/"
# and also make sure we supply a non-relative path
# (snakemake removes double-slash // characters)
if not dirname.startswith("/"):
dirname = "/" + dirname
return domain, dirname, filename
def exists(self, url):
domain, dirname, filename = self._parse_url(url)
status, statInfo = self.get_client(domain).stat(os.path.join(dirname, filename))
if not status.ok:
if status.errno == 3011:
return False
raise XRootDFileException(
"Error stating URL "
+ os.path.join(dirname, filename)
+ " on domain "
+ domain
+ "\n"
+ repr(status)
+ "\n"
+ repr(statInfo)
)
return True
# return not (
# (statInfo.flags & StatInfoFlags.IS_DIR)
# or (statInfo.flags & StatInfoFlags.OTHER)
# )
def _get_statinfo(self, url):
domain, dirname, filename = self._parse_url(url)
matches = [
f for f in self.list_directory(domain, dirname) if f.name == filename
]
assert len(matches) > 0
if len(matches) > 1:
# -- check matches for consistency
# There is a transient effect in XRootD
# where a file may match more than once.
# This is okay as long as the statinfo
# is the same for all of them.
relevant_properties = [ # we only need to check front-facing attributes
x
for x in dir(matches[0].statinfo)
if not (x[:1] == "_" or x[-2:] == "__")
]
assert all(
getattr(m.statinfo, p) == getattr(matches[0].statinfo, p)
for m in matches[1:]
for p in relevant_properties
)
return matches[0].statinfo
def file_last_modified(self, filename):
return self._get_statinfo(filename).modtime
def file_size(self, filename):
return self._get_statinfo(filename).size
def copy(self, source, destination):
# Prepare the source path for XRootD
if not self._parse_url(source):
source = abspath(source)
else:
domain, dirname, filename = self._parse_url(source)
source = f"{domain}/{dirname}/{filename}"
# Prepare the destination path for XRootD
assert os.path.basename(source) == os.path.basename(destination)
if self._parse_url(destination):
domain, dirname, filename = self._parse_url(destination)
destination = f"{domain}/{dirname}/{filename}"
self.makedirs(domain, dirname)
else:
destination = abspath(destination)
if not os.path.isdir(os.path.dirname(destination)):
os.makedirs(os.path.dirname(destination))
# Perform the copy operation
process = client.CopyProcess()
process.add_job(source, destination, force=True)
process.prepare()
status, returns = process.run()
if not status.ok or not returns[0]["status"].ok:
raise XRootDFileException(
"Error copying from " + source + " to " + destination,
repr(status),
repr(returns),
)
def makedirs(self, domain, dirname):
print("Making directories", domain, dirname)
assert dirname.endswith("/")
status, _ = self.get_client(domain).mkdir(dirname, MkDirFlags.MAKEPATH)
if not status.ok:
raise XRootDFileException(
"Failed to create directory " + dirname, repr(status)
)
def list_directory(self, domain, dirname):
status, dirlist = self.get_client(domain).dirlist(dirname, DirListFlags.STAT)
if not status.ok:
raise XRootDFileException(
"Error listing directory "
+ dirname
+ " on domain "
+ domain
+ "\n"
+ repr(status)
+ "\n"
+ repr(dirlist)
)
return dirlist.dirlist
def list_directory_recursive(self, start_url):
assert start_url.endswith("/")
domain, dirname, filename = self._parse_url(start_url)
assert not filename
filename = join(dirname, filename)
for f in self.list_directory(domain, dirname):
if f.statinfo.flags & StatInfoFlags.IS_DIR:
for _f_name in self.list_directory_recursive(
domain + dirname + f.name + "/"
):
yield _f_name
else:
# Only yield files as directories don't have timestamps on XRootD
yield domain + dirname + f.name
def remove(self, url):
domain, dirname, filename = self._parse_url(url)
filename = join(dirname, filename)
status, _ = self.get_client(domain).rm(filename)
if not status.ok:
raise XRootDFileException(
"Failed to remove file "
+ filename
+ " from remote "
+ domain
+ "\n"
+ repr(status)
)