diff --git a/README.rst b/README.rst
index 1b3dc36f..fb3a8fb1 100644
--- a/README.rst
+++ b/README.rst
@@ -148,6 +148,15 @@ By default, ``arraysize`` is set to ``5000``. ``arraysize`` is used to set the b
engine = create_engine('bigquery://project', arraysize=1000)
+Page size for dataset.list_tables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+By default, ``list_tables_page_size`` is set to ``1000``. ``list_tables_page_size`` is used to set the max_results for `dataset.list_tables`_ operation. To change it, pass ``list_tables_page_size`` to ``create_engine()``:
+
+.. _`dataset.list_tables`: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
+.. code-block:: python
+
+ engine = create_engine('bigquery://project', list_tables_page_size=100)
Adding a Default Dataset
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -180,7 +189,7 @@ Connection String Parameters
There are many situations where you can't call ``create_engine`` directly, such as when using tools like `Flask SQLAlchemy `_. For situations like these, or for situations where you want the ``Client`` to have a `default_query_job_config `_, you can pass many arguments in the query of the connection string.
-The ``credentials_path``, ``credentials_info``, ``location``, and ``arraysize`` parameters are used by this library, and the rest are used to create a `QueryJobConfig `_
+The ``credentials_path``, ``credentials_info``, ``location``, ``arraysize`` and ``list_tables_page_size`` parameters are used by this library, and the rest are used to create a `QueryJobConfig `_
Note that if you want to use query strings, it will be more reliable if you use three slashes, so ``'bigquery:///?a=b'`` will work reliably, but ``'bigquery://?a=b'`` might be interpreted as having a "database" of ``?a=b``, depending on the system being used to parse the connection string.
@@ -193,6 +202,7 @@ Here are examples of all the supported arguments. Any not present are either for
'credentials_path=/some/path/to.json' '&'
'location=some-location' '&'
'arraysize=1000' '&'
+ 'list_tables_page_size=100' '&'
'clustering_fields=a,b,c' '&'
'create_disposition=CREATE_IF_NEEDED' '&'
'destination=different-project.different-dataset.table' '&'
diff --git a/pybigquery/parse_url.py b/pybigquery/parse_url.py
index 13dda364..370a46c7 100644
--- a/pybigquery/parse_url.py
+++ b/pybigquery/parse_url.py
@@ -68,6 +68,7 @@ def parse_url(url): # noqa: C901
dataset_id = url.database or None
arraysize = None
credentials_path = None
+ list_tables_page_size = None
# location
if "location" in query:
@@ -85,6 +86,16 @@ def parse_url(url): # noqa: C901
except ValueError:
raise ValueError("invalid int in url query arraysize: " + str_arraysize)
+ if "list_tables_page_size" in query:
+ str_list_tables_page_size = query.pop("list_tables_page_size")
+ try:
+ list_tables_page_size = int(str_list_tables_page_size)
+ except ValueError:
+ raise ValueError(
+ "invalid int in url query list_tables_page_size: "
+ + str_list_tables_page_size
+ )
+
# if only these "non-config" values were present, the dict will now be empty
if not query:
# if a dataset_id exists, we need to return a job_config that isn't None
@@ -97,9 +108,18 @@ def parse_url(url): # noqa: C901
arraysize,
credentials_path,
QueryJobConfig(),
+ list_tables_page_size,
)
else:
- return project_id, location, dataset_id, arraysize, credentials_path, None
+ return (
+ project_id,
+ location,
+ dataset_id,
+ arraysize,
+ credentials_path,
+ None,
+ list_tables_page_size,
+ )
job_config = QueryJobConfig()
@@ -239,4 +259,12 @@ def parse_url(url): # noqa: C901
"invalid write_disposition in url query: " + query["write_disposition"]
)
- return project_id, location, dataset_id, arraysize, credentials_path, job_config
+ return (
+ project_id,
+ location,
+ dataset_id,
+ arraysize,
+ credentials_path,
+ job_config,
+ list_tables_page_size,
+ )
diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py
index 60b8aab0..795f7c33 100644
--- a/pybigquery/sqlalchemy_bigquery.py
+++ b/pybigquery/sqlalchemy_bigquery.py
@@ -657,6 +657,7 @@ def __init__(
credentials_path=None,
location=None,
credentials_info=None,
+ list_tables_page_size=1000,
*args,
**kwargs,
):
@@ -666,6 +667,7 @@ def __init__(
self.credentials_info = credentials_info
self.location = location
self.dataset_id = None
+ self.list_tables_page_size = list_tables_page_size
@classmethod
def dbapi(cls):
@@ -694,9 +696,11 @@ def create_connect_args(self, url):
arraysize,
credentials_path,
default_query_job_config,
+ list_tables_page_size,
) = parse_url(url)
- self.arraysize = self.arraysize or arraysize
+ self.arraysize = arraysize or self.arraysize
+ self.list_tables_page_size = list_tables_page_size or self.list_tables_page_size
self.location = location or self.location
self.credentials_path = credentials_path or self.credentials_path
self.dataset_id = dataset_id
@@ -737,7 +741,9 @@ def _get_table_or_view_names(self, connection, table_type, schema=None):
continue
try:
- tables = client.list_tables(dataset.reference)
+ tables = client.list_tables(
+ dataset.reference, page_size=self.list_tables_page_size
+ )
for table in tables:
if table_type == table.table_type:
result.append(get_table_name(table))
diff --git a/setup.py b/setup.py
index 65f121ce..3e16f7d8 100644
--- a/setup.py
+++ b/setup.py
@@ -65,9 +65,9 @@ def readme():
],
platforms="Posix; MacOS X; Windows",
install_requires=[
- "google-api-core>=1.23.0", # Work-around bug in cloud core deps.
- "google-auth>=1.24.0,<2.0dev", # Work around pip wack.
- "google-cloud-bigquery>=2.17.0",
+ "google-api-core>=1.30.0", # Work-around bug in cloud core deps.
+ "google-auth>=1.25.0,<2.0dev", # Work around pip wack.
+ "google-cloud-bigquery>=2.19.0",
"sqlalchemy>=1.2.0,<1.5.0dev",
"future",
],
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index 03281e21..1785edd0 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -5,6 +5,6 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
sqlalchemy==1.2.0
-google-auth==1.24.0
-google-cloud-bigquery==2.17.0
-google-api-core==1.23.0
+google-auth==1.25.0
+google-cloud-bigquery==2.19.0
+google-api-core==1.30.0
diff --git a/tests/unit/fauxdbi.py b/tests/unit/fauxdbi.py
index de835753..23d832f1 100644
--- a/tests/unit/fauxdbi.py
+++ b/tests/unit/fauxdbi.py
@@ -462,7 +462,7 @@ def list_datasets(self):
google.cloud.bigquery.Dataset("myproject.yourdataset"),
]
- def list_tables(self, dataset):
+ def list_tables(self, dataset, page_size):
with contextlib.closing(self.connection.connection.cursor()) as cursor:
cursor.execute("select * from sqlite_master")
return [
diff --git a/tests/unit/test_engine.py b/tests/unit/test_engine.py
index ad34ca08..8d8d75b9 100644
--- a/tests/unit/test_engine.py
+++ b/tests/unit/test_engine.py
@@ -52,3 +52,15 @@ def test_set_arraysize(faux_conn, metadata):
# Because we gave a false array size, the array size wasn't set on the cursor:
assert conn.connection.test_data["arraysize"] == 42
+
+
+def test_arraysize_querystring_takes_precedence_over_default(faux_conn, metadata):
+ arraysize = 42
+ engine = sqlalchemy.create_engine(
+ f"bigquery://myproject/mydataset?arraysize={arraysize}"
+ )
+ sqlalchemy.Table("t", metadata, sqlalchemy.Column("c", sqlalchemy.Integer))
+ conn = engine.connect()
+ metadata.create_all(engine)
+
+ assert conn.connection.test_data["arraysize"] == arraysize
diff --git a/tests/unit/test_parse_url.py b/tests/unit/test_parse_url.py
index 3da0546d..a3b5c2fb 100644
--- a/tests/unit/test_parse_url.py
+++ b/tests/unit/test_parse_url.py
@@ -50,6 +50,7 @@ def url_with_everything():
"?credentials_path=/some/path/to.json"
"&location=some-location"
"&arraysize=1000"
+ "&list_tables_page_size=5000"
"&clustering_fields=a,b,c"
"&create_disposition=CREATE_IF_NEEDED"
"&destination=different-project.different-dataset.table"
@@ -72,12 +73,14 @@ def test_basic(url_with_everything):
arraysize,
credentials_path,
job_config,
+ list_tables_page_size,
) = parse_url(url_with_everything)
assert project_id == "some-project"
assert location == "some-location"
assert dataset_id == "some-dataset"
assert arraysize == 1000
+ assert list_tables_page_size == 5000
assert credentials_path == "/some/path/to.json"
assert isinstance(job_config, QueryJobConfig)
@@ -136,6 +139,7 @@ def test_all_values(url_with_everything, param, value, default):
"param, value",
[
("arraysize", "not-int"),
+ ("list_tables_page_size", "not-int"),
("create_disposition", "not-attribute"),
("destination", "not.fully-qualified"),
("dry_run", "not-bool"),
@@ -167,7 +171,15 @@ def test_empty_with_non_config():
"bigquery:///?location=some-location&arraysize=1000&credentials_path=/some/path/to.json"
)
)
- project_id, location, dataset_id, arraysize, credentials_path, job_config = url
+ (
+ project_id,
+ location,
+ dataset_id,
+ arraysize,
+ credentials_path,
+ job_config,
+ list_tables_page_size,
+ ) = url
assert project_id is None
assert location == "some-location"
@@ -175,17 +187,27 @@ def test_empty_with_non_config():
assert arraysize == 1000
assert credentials_path == "/some/path/to.json"
assert job_config is None
+ assert list_tables_page_size is None
def test_only_dataset():
url = parse_url(make_url("bigquery:///some-dataset"))
- project_id, location, dataset_id, arraysize, credentials_path, job_config = url
+ (
+ project_id,
+ location,
+ dataset_id,
+ arraysize,
+ credentials_path,
+ job_config,
+ list_tables_page_size,
+ ) = url
assert project_id is None
assert location is None
assert dataset_id == "some-dataset"
assert arraysize is None
assert credentials_path is None
+ assert list_tables_page_size is None
assert isinstance(job_config, QueryJobConfig)
# we can't actually test that the dataset is on the job_config,
# since we take care of that afterwards, when we have a client to fill in the project