Skip to content

Commit

Permalink
[ENG-4624] [S3 Improvements] Project PR - Waterbutler Part (#406)
Browse files Browse the repository at this point in the history
* allow the base folder to not be a bucket
* use colon delineation for S3 buckets
* fix folder paths
* add unit tests for s3 improvements
* more path fixes

---------

Co-authored-by: John Tordoff <Johnetordoff@users.noreply.github.com>
Co-authored-by: John Tordoff <johnetordoff@gmail.com>
  • Loading branch information
3 people committed Aug 17, 2023
1 parent 2745780 commit d5dab36
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 46 deletions.
1 change: 1 addition & 0 deletions tests/providers/s3/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def credentials():
@pytest.fixture
def settings():
return {
'id': 'that kerning:/my-subfolder/',
'bucket': 'that kerning',
'encrypt_uploads': False
}
Expand Down
146 changes: 111 additions & 35 deletions tests/providers/s3/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,53 +210,101 @@ class TestValidatePath:
async def test_validate_v1_path_file(self, provider, file_header_metadata, mock_time):
file_path = 'foobah'

params = {'prefix': '/' + file_path + '/', 'delimiter': '/'}
good_metadata_url = provider.bucket.new_key('/' + file_path).generate_url(100, 'HEAD')
bad_metadata_url = provider.bucket.generate_url(100)
aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata)
aiohttpretty.register_uri('GET', bad_metadata_url, params=params, status=404)

assert WaterButlerPath('/') == await provider.validate_v1_path('/')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder/{file_path}').generate_url(100, 'HEAD')
root_metadata_url = provider.bucket.new_key('/').generate_url(100, 'GET')
aiohttpretty.register_uri(
'GET',
root_metadata_url,
headers=file_header_metadata,
params={
'prefix': '/my-subfolder/',
'delimiter': '/'
}
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata,
)
aiohttpretty.register_uri(
'GET',
root_metadata_url,
headers=file_header_metadata,
params={
'prefix': f'/my-subfolder/{file_path}/',
'delimiter': '/'
}
)
assert WaterButlerPath('/my-subfolder/', prepend=None) == await provider.validate_v1_path('/')

try:
wb_path_v1 = await provider.validate_v1_path('/' + file_path)
except Exception as exc:
pytest.fail(str(exc))

with pytest.raises(exceptions.NotFoundError) as exc:
await provider.validate_v1_path('/' + file_path + '/')

assert exc.value.code == client.NOT_FOUND

wb_path_v0 = await provider.validate_path('/' + file_path)

assert wb_path_v1 == wb_path_v0

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_validate_v1_path_folder(self, provider, folder_metadata, mock_time):
folder_path = 'Photos'
async def test_validate_v1_path_file_with_subfolder(self, provider, file_header_metadata, mock_time):
file_path = '/foobah'

params = {'prefix': '/' + folder_path + '/', 'delimiter': '/'}
good_metadata_url = provider.bucket.generate_url(100)
bad_metadata_url = provider.bucket.new_key('/' + folder_path).generate_url(100, 'HEAD')
good_metadata_url_root = provider.bucket.new_key('/').generate_url(100, 'GET')
good_metadata_url = provider.bucket.new_key(file_path).generate_url(100, 'GET')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder{file_path}').generate_url(100, 'HEAD')
aiohttpretty.register_uri(
'GET', good_metadata_url, params=params,
body=folder_metadata, headers={'Content-Type': 'application/xml'}
'GET',
good_metadata_url,
params={'delimiter': '/', 'prefix': '/my-subfolder/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'GET',
good_metadata_url_root,
params={'delimiter': '/', 'prefix': '/my-subfolder/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata
)
aiohttpretty.register_uri('HEAD', bad_metadata_url, status=404)

try:
wb_path_v1 = await provider.validate_v1_path('/' + folder_path + '/')
except Exception as exc:
pytest.fail(str(exc))
assert WaterButlerPath('/my-subfolder/') == await provider.validate_v1_path('/')
wb_path_v1 = await provider.validate_v1_path(file_path)
wb_path_v0 = await provider.validate_path(file_path)

with pytest.raises(exceptions.NotFoundError) as exc:
await provider.validate_v1_path('/' + folder_path)
assert wb_path_v1 == wb_path_v0

assert exc.value.code == client.NOT_FOUND
@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_validate_v1_path_folder(self, provider, folder_metadata, mock_time):
folder_path = '/Photos'

good_metadata_url_root = provider.bucket.new_key('/').generate_url(100, 'GET')
good_metadata_url = provider.bucket.new_key(folder_path).generate_url(100, 'GET')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder{folder_path}').generate_url(100, 'HEAD')
aiohttpretty.register_uri(
'GET',
good_metadata_url,
params={'delimiter': '/', 'prefix': '/my-subfolder/Photos/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'GET',
good_metadata_url_root,
params={'delimiter': '/', 'prefix': '/my-subfolder/Photos/'},
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata
)

wb_path_v0 = await provider.validate_path('/' + folder_path + '/')
wb_path_v1 = await provider.validate_v1_path(folder_path + '/')
wb_path_v0 = await provider.validate_path(folder_path + '/')

assert wb_path_v1 == wb_path_v0

Expand All @@ -280,13 +328,12 @@ async def test_folder(self, provider, mock_time):
assert not path.is_root

@pytest.mark.asyncio
async def test_root(self, provider, mock_time):
async def test_subfolder(self, provider, mock_time):
path = await provider.validate_path('/')
assert path.name == ''
assert path.name == 'my-subfolder'
assert not path.is_file
assert path.is_dir
assert path.is_root

assert not path.is_root

class TestCRUD:

Expand Down Expand Up @@ -383,6 +430,35 @@ async def test_download_folder_400s(self, provider, mock_time):
await provider.download(WaterButlerPath('/cool/folder/mom/'))
assert e.value.code == 400

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_upload_to_subfolder_as_root(self,
provider,
file_content,
file_stream,
file_header_metadata,
mock_time
):

provider.settings['id'] = 'the-bucket:/my-subfolder/'
path = WaterButlerPath('/my-subfolder/foobah')

content_md5 = hashlib.md5(file_content).hexdigest()

url = provider.bucket.new_key(path.path).generate_url(100, 'PUT')
metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD')
aiohttpretty.register_uri('HEAD', metadata_url, headers=file_header_metadata)
header = {'ETag': f'"{content_md5}"'}
aiohttpretty.register_uri('PUT', url, status=201, headers=header)

metadata, created = await provider.upload(file_stream, path)

assert metadata.kind == 'file'
assert metadata.path == '/foobah'
assert not created
assert aiohttpretty.has_call(method='PUT', uri=url)
assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_upload_update(self,
Expand Down Expand Up @@ -1260,17 +1336,17 @@ class TestOperations:

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
@pytest.mark.skip('Mocking too complicated')
async def test_intra_copy(self, provider, file_header_metadata, mock_time):

source_path = WaterButlerPath('/source')
dest_path = WaterButlerPath('/dest')
metadata_url = provider.bucket.new_key(dest_path.path).generate_url(100, 'HEAD')
metadata_url = provider.bucket.new_key('/my-subfolder/' + dest_path.path).generate_url(100, 'HEAD')
aiohttpretty.register_uri('HEAD', metadata_url, headers=file_header_metadata)

header_path = '/' + os.path.join(provider.settings['bucket'], source_path.path)
headers = {'x-amz-copy-source': parse.quote(header_path)}

url = provider.bucket.new_key(dest_path.path).generate_url(100, 'PUT', headers=headers)
url = provider.bucket.new_key('/my-subfolder/' + dest_path.path).generate_url(100, 'PUT', headers=headers)
aiohttpretty.register_uri('PUT', url, status=200)

metadata, exists = await provider.intra_copy(provider, source_path, dest_path)
Expand Down
14 changes: 11 additions & 3 deletions waterbutler/providers/s3/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
from waterbutler.core import metadata


def strip_char(str, chars):
if str.startswith(chars):
return str[len(chars):]
return str


class S3Metadata(metadata.BaseMetadata):

@property
Expand All @@ -24,7 +30,7 @@ def __init__(self, path, headers):

@property
def path(self):
return '/' + self._path
return '/' + strip_char(self._path, self.raw.get('base_folder', ''))

@property
def size(self):
Expand Down Expand Up @@ -62,7 +68,7 @@ class S3FileMetadata(S3Metadata, metadata.BaseFileMetadata):

@property
def path(self):
return '/' + self.raw['Key']
return '/' + strip_char(self.raw['Key'], self.raw.get('base_folder', ''))

@property
def size(self):
Expand Down Expand Up @@ -103,7 +109,7 @@ def name(self):

@property
def path(self):
return '/' + self.raw['Key']
return '/' + strip_char(self.raw['Key'], self.raw.get('base_folder', ''))


class S3FolderMetadata(S3Metadata, metadata.BaseFolderMetadata):
Expand All @@ -114,6 +120,8 @@ def name(self):

@property
def path(self):
if self.raw.get('base_folder', ''):
return '/' + strip_char(self.raw['Prefix'], self.raw.get('base_folder', ''))
return '/' + self.raw['Prefix']


Expand Down
26 changes: 18 additions & 8 deletions waterbutler/providers/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,15 @@ def __init__(self, auth, credentials, settings, **kwargs):
self.connection = S3Connection(credentials['access_key'],
credentials['secret_key'], calling_format=OrdinaryCallingFormat())
self.bucket = self.connection.get_bucket(settings['bucket'], validate=False)
self.base_folder = self.settings.get('id', ':/').split(':/')[1]
self.encrypt_uploads = self.settings.get('encrypt_uploads', False)
self.region = None

async def validate_v1_path(self, path, **kwargs):
await self._check_region()

if path == '/':
return WaterButlerPath(path)
# The user selected base folder, the root of the where that user's node is connected.
path = f"/{self.base_folder + path.lstrip('/')}"

implicit_folder = path.endswith('/')

Expand Down Expand Up @@ -98,7 +99,8 @@ async def validate_v1_path(self, path, **kwargs):
return WaterButlerPath(path)

async def validate_path(self, path, **kwargs):
return WaterButlerPath(path)
# The user selected base folder, the root of the where that user's node is connected.
return WaterButlerPath(f"/{self.base_folder + path.lstrip('/')}")

def can_duplicate_names(self):
return True
Expand Down Expand Up @@ -637,9 +639,14 @@ async def metadata(self, path, revision=None, **kwargs):
await self._check_region()

if path.is_dir:
return (await self._metadata_folder(path))
metadata = await self._metadata_folder(path)
for item in metadata:
item.raw['base_folder'] = self.base_folder
else:
metadata = await self._metadata_file(path, revision=revision)
metadata.raw['base_folder'] = self.base_folder

return (await self._metadata_file(path, revision=revision))
return metadata

async def create_folder(self, path, folder_precheck=True, **kwargs):
"""
Expand All @@ -661,7 +668,9 @@ async def create_folder(self, path, folder_precheck=True, **kwargs):
throws=exceptions.CreateFolderError
)

return S3FolderMetadata({'Prefix': path.path})
metadata = S3FolderMetadata({'Prefix': path.path})
metadata.raw['base_folder'] = self.base_folder
return metadata

async def _metadata_file(self, path, revision=None):
await self._check_region()
Expand All @@ -686,6 +695,7 @@ async def _metadata_folder(self, path):
await self._check_region()

params = {'prefix': path.path, 'delimiter': '/'}

resp = await self.make_request(
'GET',
functools.partial(self.bucket.generate_url, settings.TEMP_URL_SECS, 'GET', query_parameters=params),
Expand Down Expand Up @@ -721,11 +731,11 @@ async def _metadata_folder(self, path):

items = [
S3FolderMetadata(item)
for item in prefixes
for item in prefixes if item['Prefix'] != path.path
]

for content in contents:
if content['Key'] == path.path:
if content['Key'] == params['prefix']:
continue

if content['Key'].endswith('/'):
Expand Down

0 comments on commit d5dab36

Please sign in to comment.