diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py index e7caddd8..c73adea9 100644 --- a/pybigquery/sqlalchemy_bigquery.py +++ b/pybigquery/sqlalchemy_bigquery.py @@ -25,6 +25,7 @@ import operator from google import auth +import google.api_core.exceptions from google.cloud.bigquery import dbapi from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import TableReference @@ -391,10 +392,17 @@ def _get_table_or_view_names(self, connection, table_type, schema=None): if current_schema is not None and current_schema != dataset.dataset_id: continue - tables = client.list_tables(dataset.reference) - for table in tables: - if table_type == table.table_type: - result.append(get_table_name(table)) + try: + tables = client.list_tables(dataset.reference) + for table in tables: + if table_type == table.table_type: + result.append(get_table_name(table)) + except google.api_core.exceptions.NotFound: + # It's possible that the dataset was deleted between when we + # fetched the list of datasets and when we try to list the + # tables from it. See: + # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/105 + pass return result @staticmethod diff --git a/tests/unit/test_sqlalchemy_bigquery.py b/tests/unit/test_sqlalchemy_bigquery.py new file mode 100644 index 00000000..dc65d513 --- /dev/null +++ b/tests/unit/test_sqlalchemy_bigquery.py @@ -0,0 +1,139 @@ +# Copyright 2021 The PyBigQuery Authors +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. + +from unittest import mock + +import google.api_core.exceptions +from google.cloud import bigquery +from google.cloud.bigquery.dataset import DatasetListItem +from google.cloud.bigquery.table import TableListItem +import pytest +import sqlalchemy + + +@pytest.fixture +def mock_bigquery_client(): + return mock.create_autospec(bigquery.Client, instance=True) + + +@pytest.fixture +def mock_connection(monkeypatch, mock_bigquery_client): + from pybigquery import sqlalchemy_bigquery + + def mock_connect_args(*args, **kwargs): + return ([mock_bigquery_client], {}) + + monkeypatch.setattr( + sqlalchemy_bigquery.BigQueryDialect, "create_connect_args", mock_connect_args + ) + + +@pytest.fixture +def engine_under_test(mock_connection): + return sqlalchemy.create_engine("bigquery://") + + +@pytest.fixture +def inspector_under_test(engine_under_test): + from sqlalchemy.engine.reflection import Inspector + + return Inspector.from_engine(engine_under_test) + + +def dataset_item(dataset_id): + return DatasetListItem( + {"datasetReference": {"projectId": "some-project-id", "datasetId": dataset_id}} + ) + + +def table_item(dataset_id, table_id, type_="TABLE"): + return TableListItem( + { + "type": type_, + "tableReference": { + "projectId": "some-project-id", + "datasetId": dataset_id, + "tableId": table_id, + }, + } + ) + + +@pytest.mark.parametrize( + ["datasets_list", "tables_lists", "expected"], + [ + ([], [], []), + ([dataset_item("dataset_1")], [[]], []), + ( + [dataset_item("dataset_1"), dataset_item("dataset_2")], + [ + [table_item("dataset_1", "d1t1"), table_item("dataset_1", "d1t2")], + [ + table_item("dataset_2", "d2t1"), + table_item("dataset_2", "d2view", type_="VIEW"), + ], + ], + ["dataset_1.d1t1", "dataset_1.d1t2", "dataset_2.d2t1"], + ), + ( + [dataset_item("dataset_1"), dataset_item("dataset_deleted")], + [ + [table_item("dataset_1", "d1t1")], + google.api_core.exceptions.NotFound("dataset_deleted"), + ], + ["dataset_1.d1t1"], + ), + ], +) +def test_get_table_names( + engine_under_test, mock_bigquery_client, datasets_list, tables_lists, expected +): + mock_bigquery_client.list_datasets.return_value = datasets_list + mock_bigquery_client.list_tables.side_effect = tables_lists + table_names = engine_under_test.table_names() + mock_bigquery_client.list_datasets.assert_called_once() + assert mock_bigquery_client.list_tables.call_count == len(datasets_list) + assert list(sorted(table_names)) == list(sorted(expected)) + + +@pytest.mark.parametrize( + ["datasets_list", "tables_lists", "expected"], + [ + ([], [], []), + ([dataset_item("dataset_1")], [[]], []), + ( + [dataset_item("dataset_1"), dataset_item("dataset_2")], + [ + [ + table_item("dataset_1", "d1t1"), + table_item("dataset_1", "d1view", type_="VIEW"), + ], + [ + table_item("dataset_2", "d2t1"), + table_item("dataset_2", "d2view", type_="VIEW"), + ], + ], + ["dataset_1.d1view", "dataset_2.d2view"], + ), + ( + [dataset_item("dataset_1"), dataset_item("dataset_deleted")], + [ + [table_item("dataset_1", "d1view", type_="VIEW")], + google.api_core.exceptions.NotFound("dataset_deleted"), + ], + ["dataset_1.d1view"], + ), + ], +) +def test_get_view_names( + inspector_under_test, mock_bigquery_client, datasets_list, tables_lists, expected +): + mock_bigquery_client.list_datasets.return_value = datasets_list + mock_bigquery_client.list_tables.side_effect = tables_lists + view_names = inspector_under_test.get_view_names() + mock_bigquery_client.list_datasets.assert_called_once() + assert mock_bigquery_client.list_tables.call_count == len(datasets_list) + assert list(sorted(view_names)) == list(sorted(expected))