Skip to content

Commit

Permalink
[CIVIS-6409] Change civis_file_to_table to not use table ids to valid…
Browse files Browse the repository at this point in the history
…ate that a table exists (#467)

* [CIVIS-6409] Change civis_file_to_table to not use table ids to validate that a table exists

* add changelog notes, fix vulns

* joblib 1.3 doesn't work

* requests > 2.29.0 installs an incompatible version of urllib3

* problem with urllib3 > 2 for tests, bump patch version

* remove safety, change numpy version for python 3.7

---------

Co-authored-by: mjziolko <mjziolko@civisanalytics.com>
  • Loading branch information
mjziolko and mjziolko committed Jul 13, 2023
1 parent 96a31a7 commit c1c9af5
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 32 deletions.
8 changes: 0 additions & 8 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ workflows:
python setup.py sdist bdist_wheel && \
twine check dist/`ls dist/ | grep .tar.gz` && \
twine check dist/`ls dist/ | grep .whl`
- pre-build:
name: safety
command-run: |
pip install -e . && \
pip install --upgrade safety && \
safety --version && \
safety check
- pre-build:
name: bandit
command-run: |
Expand All @@ -98,7 +91,6 @@ workflows:
- flake8
- sphinx-build
- twine
- safety
- bandit
matrix:
parameters:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
### Fixed
### Security

## 1.16.1 - 2023-07-10
### Changed
- Changed civis_file_to_table to not rely on table ids for determining a table's existence (#464)

## 1.16.0 - 2021-12-14
### Added
- Added documentation around testing code using mocking (#447)
Expand Down
2 changes: 1 addition & 1 deletion civis/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.16.0"
__version__ = "1.16.1"
2 changes: 1 addition & 1 deletion civis/cli/_cli_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def notebooks_download_cmd(notebook_id, path):
"""Download a notebook to a specified local path."""
client = civis.APIClient()
info = client.notebooks.get(notebook_id)
response = requests.get(info['notebook_url'], stream=True)
response = requests.get(info['notebook_url'], stream=True, timeout=60)
response.raise_for_status()
chunk_size = 32 * 1024
chunked = response.iter_content(chunk_size)
Expand Down
7 changes: 4 additions & 3 deletions civis/io/_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _post():
# requests will not stream multipart/form-data, but _single_upload
# is only used for small file objects or non-seekable file objects
# which can't be streamed with using requests-toolbelt anyway
response = requests.post(url, files=form_key)
response = requests.post(url, files=form_key, timeout=60)

if not response.ok:
msg = _get_aws_error_message(response)
Expand Down Expand Up @@ -151,7 +151,8 @@ def _upload_part_base(item, file_path, part_size, file_size):
with open(file_path, 'rb') as fin:
fin.seek(offset)
partial_buf = BufferedPartialReader(fin, num_bytes)
part_response = requests.put(part_url, data=partial_buf)
part_response = requests.put(part_url, data=partial_buf,
timeout=60)

if not part_response.ok:
msg = _get_aws_error_message(part_response)
Expand Down Expand Up @@ -368,7 +369,7 @@ def _download_url_to_buf():
# Reset the buffer in case we had to retry.
buf.seek(buf_orig_position)

response = requests.get(url, stream=True)
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
chunked = response.iter_content(CHUNK_SIZE)
for lines in chunked:
Expand Down
14 changes: 9 additions & 5 deletions civis/io/_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from civis import APIClient
from civis._utils import maybe_get_random_name
from civis.base import EmptyResultError, CivisImportError
from civis.base import EmptyResultError, CivisImportError, CivisAPIError
from civis.futures import CivisFuture
from civis.io import civis_to_file, file_to_civis
from civis.utils import run_job
Expand Down Expand Up @@ -347,7 +347,7 @@ def f(x):

data = pd.read_csv(url, **kwargs)
else:
response = requests.get(url, stream=True)
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()

with io.StringIO() as buf:
Expand Down Expand Up @@ -1063,12 +1063,16 @@ def civis_file_to_table(file_id, database, table, client=None,
)

try:
client.get_table_id(table, database)
client.databases.get_schemas_tables(db_id, schema, table_name)
log.debug('Table {table} already exists - skipping column '
'detection'.format(table=table))
table_exists = True
except ValueError:
except CivisAPIError as e:
table_exists = False
if e.status_code != 404:
warnings.warn("Unexpected error when checking if table %s.%s "
"exists on database %d:\n%s"
% (schema, table_name, db_id, str(e)))

sql_types_provided = False
if table_columns:
Expand Down Expand Up @@ -1184,7 +1188,7 @@ def _decompress_stream(response, buf, write_bytes=True, encoding="utf-8"):


def _download_file(url, local_path, headers, compression):
response = requests.get(url, stream=True)
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()

# gzipped buffers can be concatenated so write headers as gzip
Expand Down
2 changes: 1 addition & 1 deletion civis/resources/civis_api_spec.json

Large diffs are not rendered by default.

29 changes: 19 additions & 10 deletions civis/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ def test_civis_file_to_table_table_exists(self,
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713

self.mock_client.get_table_id.return_value = 42
self.mock_client.databases.get_schemas_tables.return_value = Response({
'name': 'table1'
})
m_process_cleaning_results.return_value = (
[mock_cleaned_file_id],
True, # headers
Expand Down Expand Up @@ -320,7 +322,8 @@ def test_civis_file_to_table_table_doesnt_exist(
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713

self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
mock_columns = [{'name': 'foo', 'sql_type': 'INTEGER'}]
m_process_cleaning_results.return_value = (
[mock_cleaned_file_id],
Expand Down Expand Up @@ -418,7 +421,8 @@ def test_civis_file_to_table_table_doesnt_exist_all_sql_types_missing(
.id = mock_import_id
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713
self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
table_columns = [{'name': 'a', 'sql_type': ''},
{'name': 'b', 'sql_type': ''}]
detected_columns = [{'name': 'a', 'sql_type': 'INTEGER'},
Expand Down Expand Up @@ -519,7 +523,8 @@ def test_civis_file_to_table_table_does_not_exist_some_sql_types_missing(
.id = mock_import_id
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713
self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
table_columns = [{'name': 'a', 'sql_type': 'INT'},
{'name': 'b', 'sql_type': ''}]

Expand Down Expand Up @@ -553,7 +558,8 @@ def test_civis_file_to_table_table_columns_keys_misspelled(
.id = mock_import_id
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713
self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
table_columns = [{'name': 'a', 'sqlType': 'INT'},
{'name': 'b', 'bad_type': ''}]

Expand Down Expand Up @@ -594,7 +600,8 @@ def run_subtest(mock_file_ids):
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713

self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
table_columns = [{'name': 'foo', 'sql_type': 'INTEGER'},
{'name': 'bar', 'sql_type': 'VARCHAR(42)'}]
m_process_cleaning_results.return_value = (
Expand Down Expand Up @@ -707,7 +714,8 @@ def test_civis_file_to_table_multi_file(
self.mock_client.get_database_id.return_value = 42
self.mock_client.default_credential = 713

self.mock_client.get_table_id.side_effect = ValueError('no table')
self.mock_client.databases.get_schemas_tables.side_effect =\
MockAPIError(404)
mock_columns = [{'name': 'foo', 'sql_type': 'INTEGER'}]
m_process_cleaning_results.return_value = (
mock_cleaned_file_ids,
Expand Down Expand Up @@ -1325,7 +1333,7 @@ def test_civis_to_file_local(mock_requests):
assert _fin.read() == 'abcdef'
mock_civis.files.get.assert_called_once_with(137)
mock_requests.get.assert_called_once_with(
mock_civis.files.get.return_value.file_url, stream=True)
mock_civis.files.get.return_value.file_url, stream=True, timeout=60)


@pytest.mark.civis_to_file
Expand Down Expand Up @@ -1366,7 +1374,7 @@ def mock_iter_content(_):
mock_civis.files.get.assert_called_once_with(137)
assert mock_requests.get.call_count == 2
mock_requests.get.assert_called_with(
mock_civis.files.get.return_value.file_url, stream=True)
mock_civis.files.get.return_value.file_url, stream=True, timeout=60)


@pytest.mark.file_to_civis
Expand All @@ -1387,7 +1395,8 @@ def test_file_to_civis(mock_requests, input_filename):
assert fid == expected_id
mock_civis.files.post.assert_called_once_with(civis_name, expires_at=None)
mock_requests.post.assert_called_once_with(
mock_civis.files.post.return_value.upload_url, files=mock.ANY)
mock_civis.files.post.return_value.upload_url, files=mock.ANY,
timeout=60)


@pytest.mark.split_schema_tablename
Expand Down
6 changes: 4 additions & 2 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Testing and linting
pytest==6.2.5
pytest==7.2.1
flake8==4.0.1
pytest-cov==3.0.0
urllib3==1.26.16 # version 2 is incompatible with our vcr version
vcrpy==1.11.1 # higher versions would break tests
vcrpy-unittest==0.1.7
twine==3.6.0
Expand All @@ -13,7 +14,8 @@ sphinx_rtd_theme==1.0.0
numpydoc==1.1.0

# CivisML
numpy==1.21.4
numpy==1.21.4 ; python_version <= '3.7'
numpy==1.22.4 ; python_version >= '3.8'
scikit-learn==1.0.1
scipy==1.7.2
feather-format==0.4.1
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ click>=6.0,<9
jsonref>=0.1,<=0.2.99
requests>=2.12.0,<3
jsonschema>=2.5.1,<5
joblib>=0.11,<2
joblib>=0.11,<1.3
cloudpickle>=0.2,<3
tenacity>=6.2,<9

0 comments on commit c1c9af5

Please sign in to comment.