Skip to content

Commit

Permalink
bug: add missing latency check (#763)
Browse files Browse the repository at this point in the history
* specify a particular version of bigquery to debug

* again tweaking the versions to debug issue.

* add some pip freeze commands for debugging

* updates minimum latency to correct a flaky bot issue and protect users

* Update noxfile.py

* Update noxfile.py

* Update setup.py

* Update noxfile.py

* add several test cases to test validation logic

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
chalmerlowe and gcf-owl-bot[bot] committed May 8, 2024
1 parent aaa7090 commit 6bfd1ee
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 3 deletions.
8 changes: 7 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def system(session):

install_systemtest_dependencies(session, "-c", constraints_path)

# Print out package versions.
session.run("python", "-m", "pip", "freeze")

# Run py.test against the system tests.
Expand Down Expand Up @@ -352,12 +353,15 @@ def prerelease(session):
"--quiet",
f"--junitxml=prerelease_unit_{session.python}_sponge_log.xml",
os.path.join("tests", "unit"),
*session.posargs,
)

session.run(
"py.test",
"--quiet",
f"--junitxml=prerelease_system_{session.python}_sponge_log.xml",
os.path.join("tests", "system"),
*session.posargs,
)


Expand Down Expand Up @@ -515,7 +519,9 @@ def prerelease_deps(session):
session.install(*other_deps)
session.run("python", "-m", "pip", "freeze")

# Print out prerelease package versions
# Print out package versions.
session.run("python", "-m", "pip", "freeze")

session.run(
"python", "-c", "import google.protobuf; print(google.protobuf.__version__)"
)
Expand Down
15 changes: 14 additions & 1 deletion pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,20 @@ def run_query(self, query, max_results=None, progress_bar_type=None, **kwargs):
timeout_ms = job_config_dict.get("jobTimeoutMs") or job_config_dict[
"query"
].get("timeoutMs")
timeout_ms = int(timeout_ms) if timeout_ms else None

if timeout_ms:
timeout_ms = int(timeout_ms)
# Having too small a timeout_ms results in individual
# API calls timing out before they can finish.
# ~300 milliseconds is rule of thumb for bare minimum
# latency from the BigQuery API.
minimum_latency = 400
if timeout_ms < minimum_latency:
raise QueryTimeout(
f"Query timeout must be at least 400 milliseconds: timeout_ms equals {timeout_ms}."
)
else:
timeout_ms = None

self._start_timer()
job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)
Expand Down
30 changes: 29 additions & 1 deletion tests/system/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,13 +473,41 @@ def test_timeout_configuration(self, project_id):
sql_statement = """
select count(*) from unnest(generate_array(1,1000000)), unnest(generate_array(1, 10000))
"""

# This first test confirms that we get a timeout error if we exceed the timeout limit.
# The above query is expected to take a long time and exceed the limit.
configs = [
# we have a minimum limit on the timeout_ms being 400 milliseconds
# see pandas-gbq/gbq.py/GbqConnector/run_query docstring
# for more details.
# pandas-gbq timeout configuration. Transformed to REST API compatible version.
{"query": {"useQueryCache": False, "timeoutMs": 1}},
{"query": {"useQueryCache": False, "timeoutMs": 401}},
# REST API job timeout. See:
# https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms
{"query": {"useQueryCache": False}, "jobTimeoutMs": 401},
]
for config in configs:
with pytest.raises(gbq.QueryTimeout):
gbq.read_gbq(
sql_statement,
project_id=project_id,
credentials=self.credentials,
configuration=config,
)

# This second test confirms out our validation logic won't allow a
# value less than or equal to 400 be used as a timeout value.
# by exercising the system for various edge cases to ensure we catch
# invalid values less than or equal to 400.
configs = [
{"query": {"useQueryCache": False, "timeoutMs": 399}},
{"query": {"useQueryCache": False, "timeoutMs": 400}},
{"query": {"useQueryCache": False, "timeoutMs": 1}},
{"query": {"useQueryCache": False}, "jobTimeoutMs": 399},
{"query": {"useQueryCache": False}, "jobTimeoutMs": 400},
{"query": {"useQueryCache": False}, "jobTimeoutMs": 1},
]

for config in configs:
with pytest.raises(gbq.QueryTimeout):
gbq.read_gbq(
Expand Down

0 comments on commit 6bfd1ee

Please sign in to comment.