Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add minimum timeout to getQueryResults API requests #444

Merged
merged 7 commits into from Jan 8, 2021
Merged
14 changes: 14 additions & 0 deletions google/cloud/bigquery/client.py
Expand Up @@ -94,6 +94,14 @@
)
_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows"

# In microbenchmarks, it's been shown that even in ideal conditions (query
# finished, local data), requests to getQueryResults can take 10+ seconds.
# In less-than-ideal situations, the response can take even longer, as it must
# be able to download a full 100+ MB row in that time. Don't let the
# connection timeout before data can be downloaded.
# https://github.com/googleapis/python-bigquery/issues/438
_MIN_GET_QUERY_RESULTS_TIMEOUT = 120


class Project(object):
"""Wrapper for resource describing a BigQuery project.
Expand Down Expand Up @@ -1580,6 +1588,9 @@ def _get_query_results(

extra_params = {"maxResults": 0}

if timeout is not None:
timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)

if project is None:
project = self.project

Expand Down Expand Up @@ -3307,6 +3318,9 @@ def _list_rows_from_query_results(
"location": location,
}

if timeout is not None:
timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document this in the methods? e.g. timeout less than the _MIN_GET_QUERY_RESULTS_TIMEOUT are ignored?


if start_index is not None:
params["startIndex"] = start_index

Expand Down
6 changes: 6 additions & 0 deletions tests/unit/job/test_query.py
Expand Up @@ -1046,6 +1046,8 @@ def test_result_invokes_begins(self):
self.assertEqual(reload_request[1]["method"], "GET")

def test_result_w_timeout(self):
import google.cloud.bigquery.client

begun_resource = self._make_resource()
query_resource = {
"jobComplete": True,
Expand All @@ -1072,6 +1074,10 @@ def test_result_w_timeout(self):
"/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID),
)
self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900)
self.assertEqual(
query_request[1]["timeout"],
google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
)
self.assertEqual(reload_request[1]["method"], "GET")

def test_result_w_page_size(self):
Expand Down
29 changes: 27 additions & 2 deletions tests/unit/test_client.py
Expand Up @@ -311,7 +311,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self):
project="other-project",
location=self.LOCATION,
timeout_ms=500,
timeout=42,
timeout=420,
)

final_attributes.assert_called_once_with({"path": path}, client, None)
Expand All @@ -320,7 +320,32 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self):
method="GET",
path=path,
query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION},
timeout=42,
timeout=420,
)

def test__get_query_results_miss_w_short_timeout(self):
import google.cloud.bigquery.client
from google.cloud.exceptions import NotFound

creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection()
path = "/projects/other-project/queries/nothere"
with self.assertRaises(NotFound):
client._get_query_results(
"nothere",
None,
project="other-project",
location=self.LOCATION,
timeout_ms=500,
timeout=1,
)

conn.api_request.assert_called_once_with(
method="GET",
path=path,
query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION},
timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
)

def test__get_query_results_miss_w_client_location(self):
Expand Down