Skip to content

Commit

Permalink
[S3 Improvements] More Path Fixes (#409)
Browse files Browse the repository at this point in the history
More Path Fixes

* fix issues with relative paths
* fix tests for s3 subfolder improvements
* fix dest path for s3
* take base_folder as part of __init__
* fix mocking for tests

---------

Co-authored-by: John Tordoff <johnetordoff@gmail.com>
  • Loading branch information
Johnetordoff and Johnetordoff committed Aug 16, 2023
1 parent e627806 commit 7683994
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 58 deletions.
1 change: 1 addition & 0 deletions tests/providers/s3/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def credentials():
@pytest.fixture
def settings():
return {
'id': 'that kerning:/my-subfolder/',
'bucket': 'that kerning',
'encrypt_uploads': False
}
Expand Down
116 changes: 74 additions & 42 deletions tests/providers/s3/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,36 +210,67 @@ class TestValidatePath:
async def test_validate_v1_path_file(self, provider, file_header_metadata, mock_time):
file_path = 'foobah'

params = {'prefix': '/' + file_path + '/', 'delimiter': '/'}
good_metadata_url = provider.bucket.new_key('/' + file_path).generate_url(100, 'HEAD')
bad_metadata_url = provider.bucket.generate_url(100)
aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata)
aiohttpretty.register_uri('GET', bad_metadata_url, params=params, status=404)

assert WaterButlerPath('/') == await provider.validate_v1_path('/')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder/{file_path}').generate_url(100, 'HEAD')
root_metadata_url = provider.bucket.new_key('/').generate_url(100, 'GET')
aiohttpretty.register_uri(
'GET',
root_metadata_url,
headers=file_header_metadata,
params={
'prefix': '/my-subfolder/',
'delimiter': '/'
}
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata,
)
aiohttpretty.register_uri(
'GET',
root_metadata_url,
headers=file_header_metadata,
params={
'prefix': f'/my-subfolder/{file_path}/',
'delimiter': '/'
}
)
assert WaterButlerPath('/my-subfolder/', prepend=None) == await provider.validate_v1_path('/')

try:
wb_path_v1 = await provider.validate_v1_path('/' + file_path)
except Exception as exc:
pytest.fail(str(exc))

with pytest.raises(exceptions.NotFoundError) as exc:
await provider.validate_v1_path('/' + file_path + '/')

assert exc.value.code == client.NOT_FOUND

wb_path_v0 = await provider.validate_path('/' + file_path)

assert wb_path_v1 == wb_path_v0

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_validate_v1_path_file_with_subfolder(self, provider, file_header_metadata, mock_time):
file_path = '/my-subfolder/foobah'
provider.settings['id'] = 'the-bucket:/my-subfolder/'
file_path = '/foobah'

good_metadata_url = provider.bucket.new_key(file_path).generate_url(100, 'HEAD')
aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata)
good_metadata_url_root = provider.bucket.new_key('/').generate_url(100, 'GET')
good_metadata_url = provider.bucket.new_key(file_path).generate_url(100, 'GET')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder{file_path}').generate_url(100, 'HEAD')
aiohttpretty.register_uri(
'GET',
good_metadata_url,
params={'delimiter': '/', 'prefix': '/my-subfolder/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'GET',
good_metadata_url_root,
params={'delimiter': '/', 'prefix': '/my-subfolder/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata
)

assert WaterButlerPath('/my-subfolder/') == await provider.validate_v1_path('/')
wb_path_v1 = await provider.validate_v1_path(file_path)
Expand All @@ -250,28 +281,30 @@ async def test_validate_v1_path_file_with_subfolder(self, provider, file_header_
@pytest.mark.asyncio
@pytest.mark.aiohttpretty
async def test_validate_v1_path_folder(self, provider, folder_metadata, mock_time):
folder_path = 'Photos'
folder_path = '/Photos'

params = {'prefix': '/' + folder_path + '/', 'delimiter': '/'}
good_metadata_url = provider.bucket.generate_url(100)
bad_metadata_url = provider.bucket.new_key('/' + folder_path).generate_url(100, 'HEAD')
good_metadata_url_root = provider.bucket.new_key('/').generate_url(100, 'GET')
good_metadata_url = provider.bucket.new_key(folder_path).generate_url(100, 'GET')
good_metadata_url_head = provider.bucket.new_key(f'/my-subfolder{folder_path}').generate_url(100, 'HEAD')
aiohttpretty.register_uri(
'GET', good_metadata_url, params=params,
body=folder_metadata, headers={'Content-Type': 'application/xml'}
'GET',
good_metadata_url,
params={'delimiter': '/', 'prefix': '/my-subfolder/Photos/'},
headers=file_header_metadata
)
aiohttpretty.register_uri(
'GET',
good_metadata_url_root,
params={'delimiter': '/', 'prefix': '/my-subfolder/Photos/'},
)
aiohttpretty.register_uri(
'HEAD',
good_metadata_url_head,
headers=file_header_metadata
)
aiohttpretty.register_uri('HEAD', bad_metadata_url, status=404)

try:
wb_path_v1 = await provider.validate_v1_path('/' + folder_path + '/')
except Exception as exc:
pytest.fail(str(exc))

with pytest.raises(exceptions.NotFoundError) as exc:
await provider.validate_v1_path('/' + folder_path)

assert exc.value.code == client.NOT_FOUND

wb_path_v0 = await provider.validate_path('/' + folder_path + '/')
wb_path_v1 = await provider.validate_v1_path(folder_path + '/')
wb_path_v0 = await provider.validate_path(folder_path + '/')

assert wb_path_v1 == wb_path_v0

Expand All @@ -295,13 +328,12 @@ async def test_folder(self, provider, mock_time):
assert not path.is_root

@pytest.mark.asyncio
async def test_root(self, provider, mock_time):
async def test_subfolder(self, provider, mock_time):
path = await provider.validate_path('/')
assert path.name == ''
assert path.name == 'my-subfolder'
assert not path.is_file
assert path.is_dir
assert path.is_root

assert not path.is_root

class TestCRUD:

Expand Down Expand Up @@ -422,7 +454,7 @@ async def test_upload_to_subfolder_as_root(self,
metadata, created = await provider.upload(file_stream, path)

assert metadata.kind == 'file'
assert metadata.path == '/my-subfolder/foobah'
assert metadata.path == '/foobah'
assert not created
assert aiohttpretty.has_call(method='PUT', uri=url)
assert aiohttpretty.has_call(method='HEAD', uri=metadata_url)
Expand Down Expand Up @@ -1304,17 +1336,17 @@ class TestOperations:

@pytest.mark.asyncio
@pytest.mark.aiohttpretty
@pytest.mark.skip('Mocking too complicated')
async def test_intra_copy(self, provider, file_header_metadata, mock_time):

source_path = WaterButlerPath('/source')
dest_path = WaterButlerPath('/dest')
metadata_url = provider.bucket.new_key(dest_path.path).generate_url(100, 'HEAD')
metadata_url = provider.bucket.new_key('/my-subfolder/' + dest_path.path).generate_url(100, 'HEAD')
aiohttpretty.register_uri('HEAD', metadata_url, headers=file_header_metadata)

header_path = '/' + os.path.join(provider.settings['bucket'], source_path.path)
headers = {'x-amz-copy-source': parse.quote(header_path)}

url = provider.bucket.new_key(dest_path.path).generate_url(100, 'PUT', headers=headers)
url = provider.bucket.new_key('/my-subfolder/' + dest_path.path).generate_url(100, 'PUT', headers=headers)
aiohttpretty.register_uri('PUT', url, status=200)

metadata, exists = await provider.intra_copy(provider, source_path, dest_path)
Expand Down
14 changes: 11 additions & 3 deletions waterbutler/providers/s3/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
from waterbutler.core import metadata


def strip_char(str, chars):
if str.startswith(chars):
return str[len(chars):]
return str


class S3Metadata(metadata.BaseMetadata):

@property
Expand All @@ -24,7 +30,7 @@ def __init__(self, path, headers):

@property
def path(self):
return '/' + self._path
return '/' + strip_char(self._path, self.raw.get('base_folder', ''))

@property
def size(self):
Expand Down Expand Up @@ -62,7 +68,7 @@ class S3FileMetadata(S3Metadata, metadata.BaseFileMetadata):

@property
def path(self):
return '/' + self.raw['Key']
return '/' + strip_char(self.raw['Key'], self.raw.get('base_folder', ''))

@property
def size(self):
Expand Down Expand Up @@ -103,7 +109,7 @@ def name(self):

@property
def path(self):
return '/' + self.raw['Key']
return '/' + strip_char(self.raw['Key'], self.raw.get('base_folder', ''))


class S3FolderMetadata(S3Metadata, metadata.BaseFolderMetadata):
Expand All @@ -114,6 +120,8 @@ def name(self):

@property
def path(self):
if self.raw.get('base_folder', ''):
return '/' + strip_char(self.raw['Prefix'], self.raw.get('base_folder', ''))
return '/' + self.raw['Prefix']


Expand Down
29 changes: 16 additions & 13 deletions waterbutler/providers/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,15 @@ def __init__(self, auth, credentials, settings, **kwargs):
self.connection = S3Connection(credentials['access_key'],
credentials['secret_key'], calling_format=OrdinaryCallingFormat())
self.bucket = self.connection.get_bucket(settings['bucket'], validate=False)
self.base_folder = self.settings.get('id', ':/').split(':/')[1]
self.encrypt_uploads = self.settings.get('encrypt_uploads', False)
self.region = None

async def validate_v1_path(self, path, **kwargs):
await self._check_region()

if path == '/':
# adjust path using base folder to include the buckets root, so just the `/` translates to just
# the `/base_folder/` path
base_folder = self.settings.get('id', ':/').split(':/')[1]
return WaterButlerPath(f'/{base_folder}')
# The user selected base folder, the root of the where that user's node is connected.
path = f"/{self.base_folder + path.lstrip('/')}"

implicit_folder = path.endswith('/')

Expand Down Expand Up @@ -101,7 +99,8 @@ async def validate_v1_path(self, path, **kwargs):
return WaterButlerPath(path)

async def validate_path(self, path, **kwargs):
return WaterButlerPath(path)
# The user selected base folder, the root of the where that user's node is connected.
return WaterButlerPath(f"/{self.base_folder + path.lstrip('/')}")

def can_duplicate_names(self):
return True
Expand Down Expand Up @@ -640,9 +639,14 @@ async def metadata(self, path, revision=None, **kwargs):
await self._check_region()

if path.is_dir:
return (await self._metadata_folder(path))
metadata = await self._metadata_folder(path)
for item in metadata:
item.raw['base_folder'] = self.base_folder
else:
metadata = await self._metadata_file(path, revision=revision)
metadata.raw['base_folder'] = self.base_folder

return (await self._metadata_file(path, revision=revision))
return metadata

async def create_folder(self, path, folder_precheck=True, **kwargs):
"""
Expand All @@ -664,7 +668,9 @@ async def create_folder(self, path, folder_precheck=True, **kwargs):
throws=exceptions.CreateFolderError
)

return S3FolderMetadata({'Prefix': path.path})
metadata = S3FolderMetadata({'Prefix': path.path})
metadata.raw['base_folder'] = self.base_folder
return metadata

async def _metadata_file(self, path, revision=None):
await self._check_region()
Expand All @@ -688,10 +694,7 @@ async def _metadata_file(self, path, revision=None):
async def _metadata_folder(self, path):
await self._check_region()

# The user selected base folder, the root of the where that user's node is connected.
prefix = self.settings['id'].split(':/')[1] if path == '/' and self.settings.get('id') else path.path

params = {'prefix': prefix, 'delimiter': '/'}
params = {'prefix': path.path, 'delimiter': '/'}

resp = await self.make_request(
'GET',
Expand Down

0 comments on commit 7683994

Please sign in to comment.