/
aws.py
348 lines (311 loc) · 13.2 KB
/
aws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
""" Interface to AWS S3 storage"""
import json
import os
import sys
import requests
import shutil
kAwsBucketPrivate = 'linn-artifacts-private'
kAwsLinnCredsUri = 'http://core.linn.co.uk/network/raw-attachment/wiki/WikiStart/credentials'
kAwsMetadataService = 'http://169.254.169.254/latest/meta-data/iam/info'
try:
import boto3
except:
print('\nAWS fetch requires boto3 module')
print("Please install this using 'pip install boto3'\n")
else:
awsSlave = False
try:
resp = requests.get(kAwsMetadataService, timeout=1)
meta = json.loads(resp.text)
if 'InstanceProfileArn' in meta:
if 'dev-tools-EC2SlaveInstanceProfile' in meta['InstanceProfileArn']:
awsSlave = True
except:
pass
if not awsSlave:
# create AWS credentials file (if not already present)
home = None
awsCreds = ''
if 'HOMEPATH' in os.environ and 'HOMEDRIVE' in os.environ:
home = os.path.join(os.environ['HOMEDRIVE'], os.environ['HOMEPATH'])
elif 'HOME' in os.environ:
home = os.environ['HOME']
if home:
awsCreds = os.path.join(home, '.aws', 'credentials')
if not os.path.exists(awsCreds):
try:
os.mkdir(os.path.join(home, '.aws'))
except:
pass
try:
resp = requests.get(kAwsLinnCredsUri)
if resp.status_code == 200:
creds = resp.text
with open(awsCreds, 'wt') as f:
f.write(creds)
except:
pass
if not os.path.exists(awsCreds):
print('ERROR: No AWS credentials, and unable to fetch them (need connection to Linn HQ network)')
sys.exit(-1)
# ------------------------------------------------------------------------------
# 'Private' class to manage AWS using boto3 - public interface at end of file
# ------------------------------------------------------------------------------
class __aws:
def __init__(self):
self.s3 = boto3.resource('s3')
self.client = boto3.client('s3')
def _copy(self, aSrc, aDst):
if 's3://' in aSrc and 's3://' in aDst:
bucketSrc = aSrc.split('/')[2]
keySrc = '/'.join(aSrc.split('/')[3:])
bucketDst = aDst.split('/')[2]
keyDst = '/'.join(aDst.split('/')[3:])
self.client.copy_object(Bucket=bucketDst, Key=keyDst, CopySource="%s/%s" % (bucketSrc, keySrc))
elif 's3://' in aSrc:
bucket = self.s3.Bucket(aSrc.split('/')[2])
obj = bucket.Object('/'.join(aSrc.split('/')[3:]))
try:
outDir = os.path.dirname(aDst)
if not os.path.exists(outDir):
os.makedirs(outDir)
except:
pass
with open(aDst, 'wb') as data:
obj.download_fileobj(data)
elif 's3://' in aDst:
bucket = self.s3.Bucket(aDst.split('/')[2])
with open(aSrc, 'rb') as data:
ext = aSrc.split(".")[-1]
if ext in ["txt", "json", "xml"]:
bucket.upload_fileobj(data, '/'.join(aDst.split('/')[3:]), ExtraArgs={'ContentType': 'text/plain'})
elif ext in ["htm", "html"]:
bucket.upload_fileobj(data, '/'.join(aDst.split('/')[3:]), ExtraArgs={'ContentType': 'text/html'})
else:
bucket.upload_fileobj(data, '/'.join(aDst.split('/')[3:]))
else:
shutil.copyfile(aSrc, aDst)
def _delete(self, aItem):
if 's3://' in aItem:
bucket = aItem.split('/')[2]
key = '/'.join(aItem.split('/')[3:])
if key is not None and len(key) > 0:
s3bucket = self.s3.Bucket(bucket)
# this allows a single file to be deleted or an entire directory, so be careful!
s3bucket.objects.filter(Prefix=key).delete()
else:
os.unlink(aItem)
def _download(self, aKey, aDestinationFile, aBucket=kAwsBucketPrivate):
print('Download from AWS s3://%s/%s to %s' % (aBucket, aKey.strip("/"), os.path.abspath(aDestinationFile)))
bucket = self.s3.Bucket(aBucket)
with open(aDestinationFile, 'wb') as data:
bucket.download_fileobj(aKey.strip("/"), data)
def _exists(self, aUri):
exists = False
bucket = aUri.split('/')[2]
key = '/'.join(aUri.split('/')[3:])
try:
self.s3.Object(bucket, key).load()
exists = True
except:
pass
return exists
def _listItems(self, aUri, aSort=None):
"""Return (non-recursive) directory listing of specified URI"""
entries = []
objects = self.__listObjs(aUri)
if 'CommonPrefixes' in objects:
for item in objects['CommonPrefixes']:
entries.append(item['Prefix'])
if 'Contents' in objects:
for item in objects['Contents']:
entries.append(item['Key'])
if aSort is not None:
entries = self.__sort(entries, aSort)
return entries
def _listItemsRecursive(self, aUri):
"""Return (non-recursive) directory listing of specified URI"""
aUri = aUri.strip("/")
entries = []
objects = self.__listObjs(aUri)
if 'CommonPrefixes' in objects:
for item in objects['CommonPrefixes']:
entries.append(item['Prefix'])
entries.extend(self._listItemsRecursive(aUri + '/' + item['Prefix'].split('/')[-2]))
if 'Contents' in objects:
for item in objects['Contents']:
entries.append(item['Key'])
return entries
def _listDetails(self, aUri):
"""Return (non-recursive) directory listing of specified URI"""
entries = []
objects = self.__listObjs(aUri)
if 'CommonPrefixes' in objects:
for item in objects['CommonPrefixes']:
entries.append({'key': item['Prefix']})
if 'Contents' in objects:
for item in objects['Contents']:
try:
timestamp = int(item['LastModified'].timestamp())
except:
# handle obsolete python versions (but rsync method below now unreliable)
timestamp = str(item['LastModified'])
entries.append({'key': item['Key'], 'modified': timestamp, 'size': item['Size']})
return entries
def _listDetailsRecursive(self, aUri):
"""Return detailed recursive directory listing of specified URI (ls -lr)"""
entries = []
objects = self.__listObjs(aUri)
if 'CommonPrefixes' in objects:
for item in objects['CommonPrefixes']:
entries.append({'key': item['Prefix']})
entries.extend(self._listDetailsRecursive(aUri + '/' + item['Prefix'].split('/')[-2]))
if 'Contents' in objects:
for item in objects['Contents']:
try:
timestamp = int(item['LastModified'].timestamp())
except:
# handle obsolete python versions (but rsync method below now unreliable)
timestamp = str(item['LastModified'])
entries.append({'key': item['Key'], 'modified': timestamp, 'size': item['Size']})
return entries
def _move(self, aSrc, aDst):
self._copy(aSrc, aDst)
self._delete(aSrc)
def _rsync(self, aSrc, aDst):
"""Perform an rsync operation - mirror contents of aSrc to aDst, only
transferring files which have changed (in terms of timestamp)"""
if 's3://' in aSrc:
srcFiles = self.__s3FileList(aSrc)
else:
srcFiles = self.__fsFileList(aSrc)
if 's3://' in aDst:
dstFiles = self.__s3FileList(aDst)
else:
dstFiles = self.__fsFileList(aDst)
for src in srcFiles: # copy in new or updated src files to dst
if 'size' in src:
doCopy = True
for dst in dstFiles:
if dst['name'] == src['name']:
if src['modified'] < dst['modified']:
print('Skipping %s' % src['path'])
doCopy = False
break
if doCopy:
dstPath = aDst + '/' + src['name']
print('Copying %s -> %s' % (src['path'], dstPath))
self._copy(src['path'], dstPath)
for dst in dstFiles: # remove dst files not present in src list
doDel = True
for src in srcFiles:
if dst['name'] == src['name']:
doDel = False
break
if doDel:
print('Deleting %s' % dst['path'])
os.unlink(dst['path'])
if 's3://' not in aDst:
for root, dirs, _files in os.walk(aDst):
for dir in dirs:
path = os.path.join(root, dir)
if not os.listdir(path):
os.rmdir(path)
# Helper methods ----------------------------------
@staticmethod
def __cmpKey(aStr):
"""Key to compare version numbers in format NN.NNN.NNNNN"""
verStr = aStr.strip("/").split("/")[-1]
version = 0
try:
fields = verStr.split('_')[0].split('.')
version = int(fields[0]) * 1000000000 + int(fields[1]) * 100000 + int(fields[2])
# this is good for up to 1000 minor and 10000 build versions
except:
pass
return version
def __listObjs(self, aUri, aContents=None, aContinuationToken=None):
if aContents is None:
# python gotcha - default args are defined when method created, not when it
# is called - hence mutable default arguments WILL change between calls
aContents = []
fields = aUri.split('/')
bucket = fields[2]
prefix = '/'.join(fields[3:])
if prefix:
if prefix[-1] != '/':
prefix += '/'
else:
prefix = '' # top 'level' of bucket
kwArgs = {'Bucket': bucket, 'Delimiter': '/', 'Prefix': prefix}
if aContinuationToken:
kwArgs['ContinuationToken'] = aContinuationToken
resp = self.client.list_objects_v2(**kwArgs)
if 'Contents' in resp:
aContents.extend(resp['Contents'])
if 'IsTruncated' in resp and resp['IsTruncated']:
resp = self.__listObjs(aUri, aContents, resp['NextContinuationToken']) # recursive !!!
resp['Contents'] = aContents
return resp
def __sort(self, aItems, aSort):
# NOTE that this wont work in python3 - need to use a 'key' function
# see functools.cmp_to_key
sortedItems = None
if 'asc' in aSort.lower():
sortedItems = sorted(aItems, key=aws.__cmpKey)
elif 'desc' in aSort.lower():
sortedItems = sorted(aItems, key=aws.__cmpKey, reverse=True)
return sortedItems
@staticmethod
def __listDiskFileDetailsRecursive(aSrc):
items = []
for root, _dirs, files in os.walk(aSrc):
for name in files:
path = os.path.join(root, name)
stat = os.stat(path)
items.append({'dir': dir, 'key': path, 'modified': int(stat.st_mtime), 'size': stat.st_size})
return items
def __s3FileList(self, aSrc):
bucket = aSrc.strip('s3://').split('/')[0]
srcFiles = self._listDetailsRecursive(aSrc)
for src in srcFiles:
src['path'] = 's3://' + bucket + '/' + src['key']
src['name'] = '/'.join(src['key'].split('/')[1:])
return srcFiles
def __fsFileList(self, aSrc):
srcFiles = self.__listDiskFileDetailsRecursive(aSrc)
prefixLen = len(aSrc) + 1
for src in srcFiles:
src['path'] = src['key']
src['name'] = src['key'][prefixLen:].replace('\\', '/')
return srcFiles
# ------------------------------------------------------------------------------
# Public interface to AWS (commands and aliases)
# ------------------------------------------------------------------------------
# NOTE that exists() method will return False for directories as they do not
# actually exist as such on AWS, but are merely a prefix on existing keys
aws = __aws()
cp = aws._copy
dir = aws._listItems
ls = aws._listItems
lsl = aws._listDetails
lsr = aws._listItemsRecursive
lslr = aws._listDetailsRecursive
mv = aws._move
rm = aws._delete
copy = aws._copy
download = aws._download
delete = aws._delete
exists = aws._exists
listDetails = aws._listItems
listDetailsRecursive = aws._listDetailsRecursive
listItems = aws._listItems
listItemsRecursive = aws._listItemsRecursive
move = aws._move
rsync = aws._rsync
if __name__ == "__main__":
# Don't change this 'test harness' - something in Volkano2 build depends on it
import sys
args = sys.argv
if args[1] == "cp":
cp(args[2], args[3])