Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: avoid 404 if dataset is deleted while listing tables or views #106

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 12 additions & 4 deletions pybigquery/sqlalchemy_bigquery.py
Expand Up @@ -25,6 +25,7 @@
import operator

from google import auth
import google.api_core.exceptions
from google.cloud.bigquery import dbapi
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import TableReference
Expand Down Expand Up @@ -391,10 +392,17 @@ def _get_table_or_view_names(self, connection, table_type, schema=None):
if current_schema is not None and current_schema != dataset.dataset_id:
continue

tables = client.list_tables(dataset.reference)
for table in tables:
if table_type == table.table_type:
result.append(get_table_name(table))
try:
tables = client.list_tables(dataset.reference)
for table in tables:
if table_type == table.table_type:
result.append(get_table_name(table))
except google.api_core.exceptions.NotFound:
# It's possible that the dataset was deleted between when we
# fetched the list of datasets and when we try to list the
# tables from it. See:
# https://github.com/googleapis/python-bigquery-sqlalchemy/issues/105
pass
return result

@staticmethod
Expand Down
139 changes: 139 additions & 0 deletions tests/unit/test_sqlalchemy_bigquery.py
@@ -0,0 +1,139 @@
# Copyright 2021 The PyBigQuery Authors
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

from unittest import mock

import google.api_core.exceptions
from google.cloud import bigquery
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.table import TableListItem
import pytest
import sqlalchemy


@pytest.fixture
def mock_bigquery_client():
return mock.create_autospec(bigquery.Client, instance=True)


@pytest.fixture
def mock_connection(monkeypatch, mock_bigquery_client):
from pybigquery import sqlalchemy_bigquery

def mock_connect_args(*args, **kwargs):
return ([mock_bigquery_client], {})

monkeypatch.setattr(
sqlalchemy_bigquery.BigQueryDialect, "create_connect_args", mock_connect_args
)


@pytest.fixture
def engine_under_test(mock_connection):
return sqlalchemy.create_engine("bigquery://")


@pytest.fixture
def inspector_under_test(engine_under_test):
from sqlalchemy.engine.reflection import Inspector

return Inspector.from_engine(engine_under_test)


def dataset_item(dataset_id):
return DatasetListItem(
{"datasetReference": {"projectId": "some-project-id", "datasetId": dataset_id}}
)


def table_item(dataset_id, table_id, type_="TABLE"):
return TableListItem(
{
"type": type_,
"tableReference": {
"projectId": "some-project-id",
"datasetId": dataset_id,
"tableId": table_id,
},
}
)


@pytest.mark.parametrize(
["datasets_list", "tables_lists", "expected"],
[
([], [], []),
([dataset_item("dataset_1")], [[]], []),
(
[dataset_item("dataset_1"), dataset_item("dataset_2")],
[
[table_item("dataset_1", "d1t1"), table_item("dataset_1", "d1t2")],
[
table_item("dataset_2", "d2t1"),
table_item("dataset_2", "d2view", type_="VIEW"),
],
],
["dataset_1.d1t1", "dataset_1.d1t2", "dataset_2.d2t1"],
),
(
[dataset_item("dataset_1"), dataset_item("dataset_deleted")],
[
[table_item("dataset_1", "d1t1")],
google.api_core.exceptions.NotFound("dataset_deleted"),
],
["dataset_1.d1t1"],
),
],
)
def test_get_table_names(
engine_under_test, mock_bigquery_client, datasets_list, tables_lists, expected
):
mock_bigquery_client.list_datasets.return_value = datasets_list
mock_bigquery_client.list_tables.side_effect = tables_lists
table_names = engine_under_test.table_names()
mock_bigquery_client.list_datasets.assert_called_once()
assert mock_bigquery_client.list_tables.call_count == len(datasets_list)
assert list(sorted(table_names)) == list(sorted(expected))


@pytest.mark.parametrize(
["datasets_list", "tables_lists", "expected"],
[
([], [], []),
([dataset_item("dataset_1")], [[]], []),
(
[dataset_item("dataset_1"), dataset_item("dataset_2")],
[
[
table_item("dataset_1", "d1t1"),
table_item("dataset_1", "d1view", type_="VIEW"),
],
[
table_item("dataset_2", "d2t1"),
table_item("dataset_2", "d2view", type_="VIEW"),
],
],
["dataset_1.d1view", "dataset_2.d2view"],
),
(
[dataset_item("dataset_1"), dataset_item("dataset_deleted")],
[
[table_item("dataset_1", "d1view", type_="VIEW")],
google.api_core.exceptions.NotFound("dataset_deleted"),
],
["dataset_1.d1view"],
),
],
)
def test_get_view_names(
inspector_under_test, mock_bigquery_client, datasets_list, tables_lists, expected
):
mock_bigquery_client.list_datasets.return_value = datasets_list
mock_bigquery_client.list_tables.side_effect = tables_lists
view_names = inspector_under_test.get_view_names()
mock_bigquery_client.list_datasets.assert_called_once()
assert mock_bigquery_client.list_tables.call_count == len(datasets_list)
assert list(sorted(view_names)) == list(sorted(expected))