Skip to content

Commit

Permalink
fix: avoid 404 if dataset is deleted while listing tables or views (#106
Browse files Browse the repository at this point in the history
)

* fix: avoid 404 if dataset is deleted while listing tables or views

* add unit tests

* remove unnecessary bigquery import
  • Loading branch information
tswast committed Apr 1, 2021
1 parent cc4e23d commit db379d8
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 4 deletions.
16 changes: 12 additions & 4 deletions pybigquery/sqlalchemy_bigquery.py
Expand Up @@ -25,6 +25,7 @@
import operator

from google import auth
import google.api_core.exceptions
from google.cloud.bigquery import dbapi
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import TableReference
Expand Down Expand Up @@ -391,10 +392,17 @@ def _get_table_or_view_names(self, connection, table_type, schema=None):
if current_schema is not None and current_schema != dataset.dataset_id:
continue

tables = client.list_tables(dataset.reference)
for table in tables:
if table_type == table.table_type:
result.append(get_table_name(table))
try:
tables = client.list_tables(dataset.reference)
for table in tables:
if table_type == table.table_type:
result.append(get_table_name(table))
except google.api_core.exceptions.NotFound:
# It's possible that the dataset was deleted between when we
# fetched the list of datasets and when we try to list the
# tables from it. See:
# https://github.com/googleapis/python-bigquery-sqlalchemy/issues/105
pass
return result

@staticmethod
Expand Down
139 changes: 139 additions & 0 deletions tests/unit/test_sqlalchemy_bigquery.py
@@ -0,0 +1,139 @@
# Copyright 2021 The PyBigQuery Authors
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

from unittest import mock

import google.api_core.exceptions
from google.cloud import bigquery
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.table import TableListItem
import pytest
import sqlalchemy


@pytest.fixture
def mock_bigquery_client():
return mock.create_autospec(bigquery.Client, instance=True)


@pytest.fixture
def mock_connection(monkeypatch, mock_bigquery_client):
from pybigquery import sqlalchemy_bigquery

def mock_connect_args(*args, **kwargs):
return ([mock_bigquery_client], {})

monkeypatch.setattr(
sqlalchemy_bigquery.BigQueryDialect, "create_connect_args", mock_connect_args
)


@pytest.fixture
def engine_under_test(mock_connection):
return sqlalchemy.create_engine("bigquery://")


@pytest.fixture
def inspector_under_test(engine_under_test):
from sqlalchemy.engine.reflection import Inspector

return Inspector.from_engine(engine_under_test)


def dataset_item(dataset_id):
return DatasetListItem(
{"datasetReference": {"projectId": "some-project-id", "datasetId": dataset_id}}
)


def table_item(dataset_id, table_id, type_="TABLE"):
return TableListItem(
{
"type": type_,
"tableReference": {
"projectId": "some-project-id",
"datasetId": dataset_id,
"tableId": table_id,
},
}
)


@pytest.mark.parametrize(
["datasets_list", "tables_lists", "expected"],
[
([], [], []),
([dataset_item("dataset_1")], [[]], []),
(
[dataset_item("dataset_1"), dataset_item("dataset_2")],
[
[table_item("dataset_1", "d1t1"), table_item("dataset_1", "d1t2")],
[
table_item("dataset_2", "d2t1"),
table_item("dataset_2", "d2view", type_="VIEW"),
],
],
["dataset_1.d1t1", "dataset_1.d1t2", "dataset_2.d2t1"],
),
(
[dataset_item("dataset_1"), dataset_item("dataset_deleted")],
[
[table_item("dataset_1", "d1t1")],
google.api_core.exceptions.NotFound("dataset_deleted"),
],
["dataset_1.d1t1"],
),
],
)
def test_get_table_names(
engine_under_test, mock_bigquery_client, datasets_list, tables_lists, expected
):
mock_bigquery_client.list_datasets.return_value = datasets_list
mock_bigquery_client.list_tables.side_effect = tables_lists
table_names = engine_under_test.table_names()
mock_bigquery_client.list_datasets.assert_called_once()
assert mock_bigquery_client.list_tables.call_count == len(datasets_list)
assert list(sorted(table_names)) == list(sorted(expected))


@pytest.mark.parametrize(
["datasets_list", "tables_lists", "expected"],
[
([], [], []),
([dataset_item("dataset_1")], [[]], []),
(
[dataset_item("dataset_1"), dataset_item("dataset_2")],
[
[
table_item("dataset_1", "d1t1"),
table_item("dataset_1", "d1view", type_="VIEW"),
],
[
table_item("dataset_2", "d2t1"),
table_item("dataset_2", "d2view", type_="VIEW"),
],
],
["dataset_1.d1view", "dataset_2.d2view"],
),
(
[dataset_item("dataset_1"), dataset_item("dataset_deleted")],
[
[table_item("dataset_1", "d1view", type_="VIEW")],
google.api_core.exceptions.NotFound("dataset_deleted"),
],
["dataset_1.d1view"],
),
],
)
def test_get_view_names(
inspector_under_test, mock_bigquery_client, datasets_list, tables_lists, expected
):
mock_bigquery_client.list_datasets.return_value = datasets_list
mock_bigquery_client.list_tables.side_effect = tables_lists
view_names = inspector_under_test.get_view_names()
mock_bigquery_client.list_datasets.assert_called_once()
assert mock_bigquery_client.list_tables.call_count == len(datasets_list)
assert list(sorted(view_names)) == list(sorted(expected))

0 comments on commit db379d8

Please sign in to comment.