diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index 649877dc4..c07f148f0 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -1,3 +1,3 @@
docker:
image: gcr.io/repo-automation-bots/owlbot-python:latest
- digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc
+ digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index 8634a3043..6572e5982 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -1,9 +1,9 @@
-# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings
-# Rules for master branch protection
+# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings
+# Rules for main branch protection
branchProtectionRules:
# Identifies the protection rule pattern. Name of the branch to be protected.
-# Defaults to `master`
-- pattern: master
+# Defaults to `main`
+- pattern: main
requiresCodeOwnerReviews: true
requiresStrictStatusChecks: true
requiredStatusCheckContexts:
diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index 302cc1e1a..4d6a1d0f6 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox
python3 -m nox --version
# If this is a continuous build, send the test log to the FlakyBot.
-# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot.
+# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot.
if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then
cleanup() {
chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot
diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh
index 311a8d54b..8a324c9c7 100755
--- a/.kokoro/test-samples-impl.sh
+++ b/.kokoro/test-samples-impl.sh
@@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do
EXIT=$?
# If this is a periodic build, send the test log to the FlakyBot.
- # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot.
+ # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot.
if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then
chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot
$KOKORO_GFILE_DIR/linux_amd64/flakybot
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 83b409015..5a3cb6bee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,53 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history
+## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01)
+
+
+### Features
+
+* set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93))
+
+
+### Bug Fixes
+
+* guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a))
+
+### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31)
+
+
+### Bug Fixes
+
+* error inserting DataFrame with REPEATED field ([#925](https://www.github.com/googleapis/python-bigquery/issues/925)) ([656d2fa](https://www.github.com/googleapis/python-bigquery/commit/656d2fa6f870573a21235c83463752a2d084caba))
+* underscores weren't allowed in struct field names when passing parameters to the DB API ([#930](https://www.github.com/googleapis/python-bigquery/issues/930)) ([fcb0bc6](https://www.github.com/googleapis/python-bigquery/commit/fcb0bc68c972c2c98bb8542f54e9228308177ecb))
+
+
+### Documentation
+
+* update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05))
+
+### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25)
+
+
+### Bug Fixes
+
+* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d))
+* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627))
+
+## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24)
+
+
+### Features
+
+* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13))
+
+### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13)
+
+
+### Bug Fixes
+
+* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
+
## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 2faf5aed3..8aecf9dd2 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout:
# Configure remotes such that you can pull changes from the googleapis/python-bigquery
# repository into your local repository.
$ git remote add upstream git@github.com:googleapis/python-bigquery.git
- # fetch and merge changes from upstream into master
+ # fetch and merge changes from upstream into main
$ git fetch upstream
- $ git merge upstream/master
+ $ git merge upstream/main
Now your local repo is set up such that you will push changes to your GitHub
repo, from which you can submit a pull request.
@@ -110,12 +110,12 @@ Coding Style
variables::
export GOOGLE_CLOUD_TESTING_REMOTE="upstream"
- export GOOGLE_CLOUD_TESTING_BRANCH="master"
+ export GOOGLE_CLOUD_TESTING_BRANCH="main"
By doing this, you are specifying the location of the most up-to-date
- version of ``python-bigquery``. The the suggested remote name ``upstream``
- should point to the official ``googleapis`` checkout and the
- the branch should be the main branch on that remote (``master``).
+ version of ``python-bigquery``. The
+ remote name ``upstream`` should point to the official ``googleapis``
+ checkout and the branch should be the default branch on that remote (``main``).
- This repository contains configuration for the
`pre-commit `__ tool, which automates checking
@@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the
``README``. Due to the reStructuredText (``rst``) parser used by
PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst``
instead of
-``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``)
+``https://github.com/googleapis/python-bigquery/blob/main/CONTRIBUTING.rst``)
may cause problems creating links or rendering the description.
.. _description on PyPI: https://pypi.org/project/google-cloud-bigquery
@@ -234,7 +234,7 @@ We support:
Supported versions can be found in our ``noxfile.py`` `config`_.
-.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py
+.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py
We also explicitly decided to support Python 3 beginning with version 3.6.
diff --git a/docs/conf.py b/docs/conf.py
index 09f7ea414..07e5d8c30 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -76,8 +76,8 @@
# The encoding of source files.
# source_encoding = 'utf-8-sig'
-# The master toctree document.
-master_doc = "index"
+# The root toctree document.
+root_doc = "index"
# General information about the project.
project = "google-cloud-bigquery"
@@ -281,7 +281,7 @@
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(
- master_doc,
+ root_doc,
"google-cloud-bigquery.tex",
"google-cloud-bigquery Documentation",
author,
@@ -316,7 +316,7 @@
# (source start file, name, description, authors, manual section).
man_pages = [
(
- master_doc,
+ root_doc,
"google-cloud-bigquery",
"google-cloud-bigquery Documentation",
[author],
@@ -335,7 +335,7 @@
# dir menu entry, description, category)
texinfo_documents = [
(
- master_doc,
+ root_doc,
"google-cloud-bigquery",
"google-cloud-bigquery Documentation",
author,
@@ -366,6 +366,8 @@
"grpc": ("https://grpc.github.io/grpc/python/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
"protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
+ "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None),
+ "geopandas": ("https://geopandas.org/", None),
}
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index 40732a298..109259711 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -58,6 +58,20 @@ The following data types are used when creating a pandas DataFrame.
- Int64
-
+Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame
+------------------------------------------------------------
+
+`GeoPandas `_ adds geospatial analytics
+capabilities to Pandas. To retrieve query results containing
+GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`:
+
+.. literalinclude:: ../samples/geography/to_geodataframe.py
+ :language: python
+ :dedent: 4
+ :start-after: [START bigquery_query_results_geodataframe]
+ :end-before: [END bigquery_query_results_geodataframe]
+
+
Load a Pandas DataFrame to a BigQuery Table
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index cb2ce40a3..e7f5bd59b 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -74,7 +74,31 @@ def is_read_session_optional(self) -> bool:
return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION
+class PyarrowVersions:
+ """Version comparisons for pyarrow package."""
+
+ def __init__(self):
+ self._installed_version = None
+
+ @property
+ def installed_version(self) -> packaging.version.Version:
+ """Return the parsed version of pyarrow."""
+ if self._installed_version is None:
+ import pyarrow
+
+ self._installed_version = packaging.version.parse(
+ # Use 0.0.0, since it is earlier than any released version.
+ # Legacy versions also have the same property, but
+ # creating a LegacyVersion has been deprecated.
+ # https://github.com/pypa/packaging/issues/321
+ getattr(pyarrow, "__version__", "0.0.0")
+ )
+
+ return self._installed_version
+
+
BQ_STORAGE_VERSIONS = BQStorageVersions()
+PYARROW_VERSIONS = PyarrowVersions()
def _not_null(value, field):
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 88759bd18..a627f5226 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -25,10 +25,40 @@
import pandas
except ImportError: # pragma: NO COVER
pandas = None
+else:
+ import numpy
import pyarrow
import pyarrow.parquet
+try:
+ # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
+ from shapely.geometry.base import BaseGeometry as _BaseGeometry
+except ImportError: # pragma: NO COVER
+ # No shapely, use NoneType for _BaseGeometry as a placeholder.
+ _BaseGeometry = type(None)
+else:
+ if pandas is not None: # pragma: NO COVER
+
+ def _to_wkb():
+ # Create a closure that:
+ # - Adds a not-null check. This allows the returned function to
+ # be used directly with apply, unlike `shapely.wkb.dumps`.
+ # - Avoid extra work done by `shapely.wkb.dumps` that we don't need.
+ # - Caches the WKBWriter (and write method lookup :) )
+ # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace.
+ from shapely.geos import WKBWriter, lgeos
+
+ write = WKBWriter(lgeos).write
+ notnull = pandas.notnull
+
+ def _to_wkb(v):
+ return write(v) if notnull(v) else v
+
+ return _to_wkb
+
+ _to_wkb = _to_wkb()
+
try:
from google.cloud.bigquery_storage import ArrowSerializationOptions
except ImportError:
@@ -71,6 +101,7 @@
"uint8": "INTEGER",
"uint16": "INTEGER",
"uint32": "INTEGER",
+ "geometry": "GEOGRAPHY",
}
@@ -191,14 +222,16 @@ def bq_to_arrow_data_type(field):
return data_type_constructor()
-def bq_to_arrow_field(bq_field):
+def bq_to_arrow_field(bq_field, array_type=None):
"""Return the Arrow field, corresponding to a given BigQuery column.
Returns:
None: if the Arrow type cannot be determined.
"""
arrow_type = bq_to_arrow_data_type(bq_field)
- if arrow_type:
+ if arrow_type is not None:
+ if array_type is not None:
+ arrow_type = array_type # For GEOGRAPHY, at least initially
is_nullable = bq_field.mode.upper() == "NULLABLE"
return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable)
@@ -245,7 +278,24 @@ def bq_schema_to_nullsafe_pandas_dtypes(
def bq_to_arrow_array(series, bq_field):
- arrow_type = bq_to_arrow_data_type(bq_field)
+ if bq_field.field_type.upper() == "GEOGRAPHY":
+ arrow_type = None
+ first = _first_valid(series)
+ if first is not None:
+ if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry):
+ arrow_type = pyarrow.binary()
+ # Convert shapey geometry to WKB binary format:
+ series = series.apply(_to_wkb)
+ elif isinstance(first, bytes):
+ arrow_type = pyarrow.binary()
+ elif series.dtype.name == "geometry":
+ # We have a GeoSeries containing all nulls, convert it to a pandas series
+ series = pandas.Series(numpy.array(series))
+
+ if arrow_type is None:
+ arrow_type = bq_to_arrow_data_type(bq_field)
+ else:
+ arrow_type = bq_to_arrow_data_type(bq_field)
field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""
@@ -299,6 +349,12 @@ def list_columns_and_indexes(dataframe):
return columns_and_indexes
+def _first_valid(series):
+ first_valid_index = series.first_valid_index()
+ if first_valid_index is not None:
+ return series.at[first_valid_index]
+
+
def dataframe_to_bq_schema(dataframe, bq_schema):
"""Convert a pandas DataFrame schema to a BigQuery schema.
@@ -339,6 +395,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
# Otherwise, try to automatically determine the type based on the
# pandas dtype.
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
+ if bq_type is None:
+ sample_data = _first_valid(dataframe[column])
+ if (
+ isinstance(sample_data, _BaseGeometry)
+ and sample_data is not None # Paranoia
+ ):
+ bq_type = "GEOGRAPHY"
bq_field = schema.SchemaField(column, bq_type)
bq_schema_out.append(bq_field)
@@ -463,11 +526,11 @@ def dataframe_to_arrow(dataframe, bq_schema):
arrow_names = []
arrow_fields = []
for bq_field in bq_schema:
- arrow_fields.append(bq_to_arrow_field(bq_field))
arrow_names.append(bq_field.name)
arrow_arrays.append(
bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field)
)
+ arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
if all((field is not None for field in arrow_fields)):
return pyarrow.Table.from_arrays(
@@ -791,7 +854,13 @@ def dataframe_to_json_generator(dataframe):
output = {}
for column, value in zip(dataframe.columns, row):
# Omit NaN values.
- if pandas.isna(value):
+ is_nan = pandas.isna(value)
+
+ # isna() can also return an array-like of bools, but the latter's boolean
+ # value is ambiguous, hence an extra check. An array-like value is *not*
+ # considered a NaN, however.
+ if isinstance(is_nan, bool) and is_nan:
continue
output[column] = value
+
yield output
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
index e2863e6a5..a738dd0f3 100644
--- a/google/cloud/bigquery/client.py
+++ b/google/cloud/bigquery/client.py
@@ -62,17 +62,24 @@
from google.cloud.bigquery.opentelemetry_tracing import create_span
from google.cloud.bigquery import job
from google.cloud.bigquery.job import (
+ CopyJob,
+ CopyJobConfig,
+ ExtractJob,
+ ExtractJobConfig,
+ LoadJob,
LoadJobConfig,
QueryJob,
QueryJobConfig,
- CopyJobConfig,
- ExtractJobConfig,
)
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.model import _model_arg_to_model_ref
from google.cloud.bigquery.query import _QueryResults
-from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
+from google.cloud.bigquery.retry import (
+ DEFAULT_JOB_RETRY,
+ DEFAULT_RETRY,
+ DEFAULT_TIMEOUT,
+)
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.schema import SchemaField
@@ -107,6 +114,8 @@
# https://github.com/googleapis/python-bigquery/issues/438
_MIN_GET_QUERY_RESULTS_TIMEOUT = 120
+TIMEOUT_HEADER = "X-Server-Timeout"
+
class Project(object):
"""Wrapper for resource describing a BigQuery project.
@@ -228,7 +237,7 @@ def get_service_account_email(
self,
project: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> str:
"""Get the email address of the project's BigQuery service account
@@ -275,7 +284,7 @@ def list_projects(
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
page_size: int = None,
) -> page_iterator.Iterator:
"""List projects for the project associated with this client.
@@ -341,7 +350,7 @@ def list_datasets(
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
page_size: int = None,
) -> page_iterator.Iterator:
"""List datasets for the project associated with this client.
@@ -512,7 +521,7 @@ def create_dataset(
dataset: Union[str, Dataset, DatasetReference],
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Dataset:
"""API call: create the dataset via a POST request.
@@ -587,7 +596,7 @@ def create_routine(
routine: Routine,
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Routine:
"""[Beta] Create a routine via a POST request.
@@ -642,7 +651,7 @@ def create_table(
table: Union[str, Table, TableReference],
exists_ok: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Table:
"""API call: create a table via a PUT request
@@ -698,23 +707,33 @@ def create_table(
return self.get_table(table.reference, retry=retry)
def _call_api(
- self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs
+ self,
+ retry,
+ span_name=None,
+ span_attributes=None,
+ job_ref=None,
+ headers: Optional[Dict[str, str]] = None,
+ **kwargs,
):
+ kwargs = _add_server_timeout_header(headers, kwargs)
call = functools.partial(self._connection.api_request, **kwargs)
+
if retry:
call = retry(call)
+
if span_name is not None:
with create_span(
name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
):
return call()
+
return call()
def get_dataset(
self,
dataset_ref: Union[DatasetReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Dataset:
"""Fetch the dataset referenced by ``dataset_ref``
@@ -758,7 +777,7 @@ def get_iam_policy(
table: Union[Table, TableReference],
requested_policy_version: int = 1,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Policy:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -788,7 +807,7 @@ def set_iam_policy(
policy: Policy,
updateMask: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Policy:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -821,7 +840,7 @@ def test_iam_permissions(
table: Union[Table, TableReference],
permissions: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Dict[str, Any]:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -846,7 +865,7 @@ def get_model(
self,
model_ref: Union[ModelReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Model:
"""[Beta] Fetch the model referenced by ``model_ref``.
@@ -889,7 +908,7 @@ def get_routine(
self,
routine_ref: Union[Routine, RoutineReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Routine:
"""[Beta] Get the routine referenced by ``routine_ref``.
@@ -933,7 +952,7 @@ def get_table(
self,
table: Union[Table, TableReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Table:
"""Fetch the table referenced by ``table``.
@@ -975,7 +994,7 @@ def update_dataset(
dataset: Dataset,
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Dataset:
"""Change some fields of a dataset.
@@ -1045,7 +1064,7 @@ def update_model(
model: Model,
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Model:
"""[Beta] Change some fields of a model.
@@ -1109,7 +1128,7 @@ def update_routine(
routine: Routine,
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Routine:
"""[Beta] Change some fields of a routine.
@@ -1183,7 +1202,7 @@ def update_table(
table: Table,
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Table:
"""Change some fields of a table.
@@ -1249,7 +1268,7 @@ def list_models(
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
page_size: int = None,
) -> page_iterator.Iterator:
"""[Beta] List models in the dataset.
@@ -1326,7 +1345,7 @@ def list_routines(
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
page_size: int = None,
) -> page_iterator.Iterator:
"""[Beta] List routines in the dataset.
@@ -1403,7 +1422,7 @@ def list_tables(
max_results: int = None,
page_token: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
page_size: int = None,
) -> page_iterator.Iterator:
"""List tables in the dataset.
@@ -1478,7 +1497,7 @@ def delete_dataset(
dataset: Union[Dataset, DatasetReference, str],
delete_contents: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
not_found_ok: bool = False,
) -> None:
"""Delete a dataset.
@@ -1537,7 +1556,7 @@ def delete_model(
self,
model: Union[Model, ModelReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
not_found_ok: bool = False,
) -> None:
"""[Beta] Delete a model
@@ -1587,12 +1606,12 @@ def delete_model(
def delete_job_metadata(
self,
- job_id,
- project=None,
- location=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- not_found_ok=False,
+ job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob],
+ project: Optional[str] = None,
+ location: Optional[str] = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
):
"""[Beta] Delete job metadata from job history.
@@ -1600,26 +1619,20 @@ def delete_job_metadata(
:func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
Args:
- job_id (Union[ \
- str, \
- google.cloud.bigquery.job.LoadJob, \
- google.cloud.bigquery.job.CopyJob, \
- google.cloud.bigquery.job.ExtractJob, \
- google.cloud.bigquery.job.QueryJob \
- ]): Job identifier.
+ job_id: Job or job identifier.
Keyword Arguments:
- project (Optional[str]):
+ project:
ID of the project which owns the job (defaults to the client's project).
- location (Optional[str]):
+ location:
Location where the job was run. Ignored if ``job_id`` is a job
object.
- retry (Optional[google.api_core.retry.Retry]):
+ retry:
How to retry the RPC.
- timeout (Optional[float]):
+ timeout:
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
- not_found_ok (Optional[bool]):
+ not_found_ok:
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the job.
"""
@@ -1660,7 +1673,7 @@ def delete_routine(
self,
routine: Union[Routine, RoutineReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
not_found_ok: bool = False,
) -> None:
"""[Beta] Delete a routine.
@@ -1714,7 +1727,7 @@ def delete_table(
self,
table: Union[Table, TableReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
not_found_ok: bool = False,
) -> None:
"""Delete a table
@@ -1767,7 +1780,7 @@ def _get_query_results(
project: str = None,
timeout_ms: int = None,
location: str = None,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> _QueryResults:
"""Get the query results object for a query job.
@@ -1856,7 +1869,7 @@ def create_job(
self,
job_config: dict,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Create a new job.
Args:
@@ -1951,7 +1964,7 @@ def get_job(
project: str = None,
location: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Fetch a job for the project associated with this client.
@@ -2025,7 +2038,7 @@ def cancel_job(
project: str = None,
location: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Attempt to cancel a job from a job ID.
@@ -2102,7 +2115,7 @@ def list_jobs(
all_users: bool = None,
state_filter: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
min_creation_time: datetime.datetime = None,
max_creation_time: datetime.datetime = None,
page_size: int = None,
@@ -2217,7 +2230,7 @@ def load_table_from_uri(
project: str = None,
job_config: LoadJobConfig = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Starts a job for loading data into a table from CloudStorage.
@@ -2301,7 +2314,7 @@ def load_table_from_file(
location: str = None,
project: str = None,
job_config: LoadJobConfig = None,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Upload the contents of this table from a file-like object.
@@ -2404,7 +2417,7 @@ def load_table_from_dataframe(
project: str = None,
job_config: LoadJobConfig = None,
parquet_compression: str = "snappy",
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Upload the contents of a table from a pandas DataFrame.
@@ -2622,7 +2635,7 @@ def load_table_from_json(
location: str = None,
project: str = None,
job_config: LoadJobConfig = None,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Upload the contents of a table from a JSON string or dict.
@@ -2905,7 +2918,7 @@ def copy_table(
project: str = None,
job_config: CopyJobConfig = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> job.CopyJob:
"""Copy one or more tables to another table.
@@ -3008,7 +3021,7 @@ def extract_table(
project: str = None,
job_config: ExtractJobConfig = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
source_type: str = "Table",
) -> job.ExtractJob:
"""Start a job to extract a table into Cloud Storage files.
@@ -3106,7 +3119,7 @@ def query(
location: str = None,
project: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
job_retry: retries.Retry = DEFAULT_JOB_RETRY,
) -> job.QueryJob:
"""Run a SQL query.
@@ -3388,7 +3401,7 @@ def insert_rows_json(
ignore_unknown_values: bool = None,
template_suffix: str = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Sequence[dict]:
"""Insert rows into a table without applying local type conversions.
@@ -3523,7 +3536,7 @@ def list_partitions(
self,
table: Union[Table, TableReference, str],
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> Sequence[str]:
"""List the partitions in a table.
@@ -3573,7 +3586,7 @@ def list_rows(
start_index: int = None,
page_size: int = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> RowIterator:
"""List the rows of the table.
@@ -3685,7 +3698,7 @@ def _list_rows_from_query_results(
start_index: int = None,
page_size: int = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: float = DEFAULT_TIMEOUT,
) -> RowIterator:
"""List the rows of a completed query.
See
@@ -3988,3 +4001,16 @@ def _get_upload_headers(user_agent):
"User-Agent": user_agent,
"content-type": "application/json",
}
+
+
+def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs):
+ timeout = kwargs.get("timeout")
+ if timeout is not None:
+ if headers is None:
+ headers = {}
+ headers[TIMEOUT_HEADER] = str(timeout)
+
+ if headers:
+ kwargs["headers"] = headers
+
+ return kwargs
diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py
index 9c134b47c..72e711bcf 100644
--- a/google/cloud/bigquery/dbapi/_helpers.py
+++ b/google/cloud/bigquery/dbapi/_helpers.py
@@ -173,7 +173,7 @@ def _parse_type(
\s*
(ARRAY|STRUCT|RECORD) # Type
\s*
- <([A-Z0-9<> ,()]+)> # Subtype(s)
+ <([A-Z0-9_<> ,()]+)> # Subtype(s)
\s*$
""",
re.IGNORECASE | re.VERBOSE,
diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py
index 587598d5f..b1239ff57 100644
--- a/google/cloud/bigquery/dbapi/cursor.py
+++ b/google/cloud/bigquery/dbapi/cursor.py
@@ -494,7 +494,7 @@ def _extract_types(
([^:)]*) # name
(?:: # ':' introduces type
( # start of type group
- [a-zA-Z0-9<>, ]+ # First part, no parens
+ [a-zA-Z0-9_<>, ]+ # First part, no parens
(?: # start sets of parens + non-paren text
\([0-9 ,]+\) # comma-separated groups of digits in parens
diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py
index c6ee98944..f0dd3d668 100644
--- a/google/cloud/bigquery/job/copy_.py
+++ b/google/cloud/bigquery/job/copy_.py
@@ -240,7 +240,7 @@ def to_api_repr(self):
def from_api_repr(cls, resource, client):
"""Factory: construct a job given its API representation
- .. note:
+ .. note::
This method assumes that the project found in the resource matches
the client's project.
diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py
index 3373bcdef..52aa036c9 100644
--- a/google/cloud/bigquery/job/extract.py
+++ b/google/cloud/bigquery/job/extract.py
@@ -244,7 +244,7 @@ def to_api_repr(self):
def from_api_repr(cls, resource: dict, client) -> "ExtractJob":
"""Factory: construct a job given its API representation
- .. note:
+ .. note::
This method assumes that the project found in the resource matches
the client's project.
diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py
index aee055c1c..b12c3e621 100644
--- a/google/cloud/bigquery/job/load.py
+++ b/google/cloud/bigquery/job/load.py
@@ -800,7 +800,7 @@ def to_api_repr(self):
def from_api_repr(cls, resource: dict, client) -> "LoadJob":
"""Factory: construct a job given its API representation
- .. note:
+ .. note::
This method assumes that the project found in the resource matches
the client's project.
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index ca3ffb2bf..c07daec99 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -53,6 +53,7 @@
# Assumption: type checks are only used by library developers and CI environments
# that have all optional dependencies installed, thus no conditional imports.
import pandas
+ import geopandas
import pyarrow
from google.api_core import retry as retries
from google.cloud import bigquery_storage
@@ -1482,6 +1483,7 @@ def to_dataframe(
create_bqstorage_client: bool = True,
date_as_object: bool = True,
max_results: Optional[int] = None,
+ geography_as_object: bool = False,
) -> "pandas.DataFrame":
"""Return a pandas DataFrame from a QueryJob
@@ -1533,13 +1535,27 @@ def to_dataframe(
.. versionadded:: 2.21.0
+ geography_as_object (Optional[bool]):
+ If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+ geometry objects. If ``False`` (default), don't cast
+ geography data to :mod:`shapely` geometry objects.
+
+ .. versionadded:: 2.24.0
+
Returns:
- A :class:`~pandas.DataFrame` populated with row data and column
- headers from the query results. The column headers are derived
- from the destination table's schema.
+ pandas.DataFrame:
+ A :class:`~pandas.DataFrame` populated with row data
+ and column headers from the query results. The column
+ headers are derived from the destination table's
+ schema.
Raises:
- ValueError: If the `pandas` library cannot be imported.
+ ValueError:
+ If the :mod:`pandas` library cannot be imported, or
+ the :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported. Also if
+ `geography_as_object` is `True`, but the
+ :mod:`shapely` library cannot be imported.
"""
query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
return query_result.to_dataframe(
@@ -1548,6 +1564,101 @@ def to_dataframe(
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
date_as_object=date_as_object,
+ geography_as_object=geography_as_object,
+ )
+
+ # If changing the signature of this method, make sure to apply the same
+ # changes to table.RowIterator.to_dataframe(), except for the max_results parameter
+ # that should only exist here in the QueryJob method.
+ def to_geodataframe(
+ self,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ max_results: Optional[int] = None,
+ geography_column: Optional[str] = None,
+ ) -> "geopandas.GeoDataFrame":
+ """Return a GeoPandas GeoDataFrame from a QueryJob
+
+ Args:
+ bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+ A BigQuery Storage API client. If supplied, use the faster
+ BigQuery Storage API to fetch rows from BigQuery. This
+ API is a billable API.
+
+ This method requires the ``fastavro`` and
+ ``google-cloud-bigquery-storage`` libraries.
+
+ Reading from a specific partition or snapshot is not
+ currently supported by this method.
+
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+ A dictionary of column names pandas ``dtype``s. The provided
+ ``dtype`` is used when constructing the series for the column
+ specified. Otherwise, the default pandas behavior is used.
+
+ progress_bar_type (Optional[str]):
+ If set, use the `tqdm `_ library to
+ display a progress bar while the data downloads. Install the
+ ``tqdm`` package to use this feature.
+
+ See
+ :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
+ for details.
+
+ .. versionadded:: 1.11.0
+ create_bqstorage_client (Optional[bool]):
+ If ``True`` (default), create a BigQuery Storage API client
+ using the default API settings. The BigQuery Storage API
+ is a faster way to fetch rows from BigQuery. See the
+ ``bqstorage_client`` parameter for more information.
+
+ This argument does nothing if ``bqstorage_client`` is supplied.
+
+ .. versionadded:: 1.24.0
+
+ date_as_object (Optional[bool]):
+ If ``True`` (default), cast dates to objects. If ``False``, convert
+ to datetime64[ns] dtype.
+
+ .. versionadded:: 1.26.0
+
+ max_results (Optional[int]):
+ Maximum number of rows to include in the result. No limit by default.
+
+ .. versionadded:: 2.21.0
+
+ geography_column (Optional[str]):
+ If there are more than one GEOGRAPHY column,
+ identifies which one to use to construct a GeoPandas
+ GeoDataFrame. This option can be ommitted if there's
+ only one GEOGRAPHY column.
+
+ Returns:
+ geopandas.GeoDataFrame:
+ A :class:`geopandas.GeoDataFrame` populated with row
+ data and column headers from the query results. The
+ column headers are derived from the destination
+ table's schema.
+
+ Raises:
+ ValueError:
+ If the :mod:`geopandas` library cannot be imported, or the
+ :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported.
+
+ .. versionadded:: 2.24.0
+ """
+ query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
+ return query_result.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
)
def __iter__(self):
diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py
index dce911232..60670167e 100644
--- a/google/cloud/bigquery/magics/magics.py
+++ b/google/cloud/bigquery/magics/magics.py
@@ -671,7 +671,9 @@ def _cell_magic(line, query):
_handle_error(ex, args.destination_var)
return
- result = rows.to_dataframe(bqstorage_client=bqstorage_client)
+ result = rows.to_dataframe(
+ bqstorage_client=bqstorage_client, create_bqstorage_client=False,
+ )
if args.destination_var:
IPython.get_ipython().push({args.destination_var: result})
return
@@ -728,11 +730,15 @@ def _cell_magic(line, query):
if max_results:
result = query_job.result(max_results=max_results).to_dataframe(
- bqstorage_client=bqstorage_client, progress_bar_type=progress_bar
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=progress_bar,
)
else:
result = query_job.to_dataframe(
- bqstorage_client=bqstorage_client, progress_bar_type=progress_bar
+ bqstorage_client=bqstorage_client,
+ create_bqstorage_client=False,
+ progress_bar_type=progress_bar,
)
if args.destination_var:
diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py
index e9286055c..830582322 100644
--- a/google/cloud/bigquery/retry.py
+++ b/google/cloud/bigquery/retry.py
@@ -29,6 +29,7 @@
exceptions.BadGateway,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ConnectionError,
+ requests.exceptions.Timeout,
auth_exceptions.TransportError,
)
@@ -59,6 +60,13 @@ def _should_retry(exc):
pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
"""
+DEFAULT_TIMEOUT = 5.0 * 60.0
+"""The default API timeout.
+
+This is the time to wait per request. To adjust the total wait time, set a
+deadline on the retry object.
+"""
+
job_retry_reasons = "rateLimitExceeded", "backendError"
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 7387f58c1..c44289324 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -31,6 +31,20 @@
import pyarrow
+try:
+ import geopandas
+except ImportError:
+ geopandas = None
+else:
+ _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"
+
+try:
+ import shapely.geos
+except ImportError:
+ shapely = None
+else:
+ _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read
+
import google.api_core.exceptions
from google.api_core.page_iterator import HTTPIterator
@@ -45,17 +59,25 @@
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
if typing.TYPE_CHECKING: # pragma: NO COVER
- from google.cloud import bigquery_storage
-
# Unconditionally import optional dependencies again to tell pytype that
# they are not None, avoiding false "no attribute" errors.
import pandas
+ import geopandas
+ from google.cloud import bigquery_storage
_NO_PANDAS_ERROR = (
"The pandas library is not installed, please install "
"pandas to use the to_dataframe() function."
)
+_NO_GEOPANDAS_ERROR = (
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+)
+_NO_SHAPELY_ERROR = (
+ "The shapely library is not installed, please install "
+ "shapely to use the geography_as_object option."
+)
_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'
@@ -1726,6 +1748,7 @@ def to_arrow(
.. versionadded:: 1.17.0
"""
self._maybe_warn_max_results(bqstorage_client)
+
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
@@ -1850,6 +1873,7 @@ def to_dataframe(
progress_bar_type: str = None,
create_bqstorage_client: bool = True,
date_as_object: bool = True,
+ geography_as_object: bool = False,
) -> "pandas.DataFrame":
"""Create a pandas DataFrame by loading all pages of a query.
@@ -1904,6 +1928,13 @@ def to_dataframe(
.. versionadded:: 1.26.0
+ geography_as_object (Optional[bool]):
+ If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+ geometry objects. If ``False`` (default), don't cast
+ geography data to :mod:`shapely` geometry objects.
+
+ .. versionadded:: 2.24.0
+
Returns:
pandas.DataFrame:
A :class:`~pandas.DataFrame` populated with row data and column
@@ -1912,11 +1943,18 @@ def to_dataframe(
Raises:
ValueError:
- If the :mod:`pandas` library cannot be imported.
+ If the :mod:`pandas` library cannot be imported, or
+ the :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported. Also if
+ `geography_as_object` is `True`, but the
+ :mod:`shapely` library cannot be imported.
"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
+ if geography_as_object and shapely is None:
+ raise ValueError(_NO_SHAPELY_ERROR)
+
if dtypes is None:
dtypes = {}
@@ -1966,8 +2004,136 @@ def to_dataframe(
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
+ if geography_as_object:
+ for field in self.schema:
+ if field.field_type.upper() == "GEOGRAPHY":
+ df[field.name] = df[field.name].dropna().apply(_read_wkt)
+
return df
+ # If changing the signature of this method, make sure to apply the same
+ # changes to job.QueryJob.to_geodataframe()
+ def to_geodataframe(
+ self,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ geography_column: Optional[str] = None,
+ ) -> "geopandas.GeoDataFrame":
+ """Create a GeoPandas GeoDataFrame by loading all pages of a query.
+
+ Args:
+ bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+ A BigQuery Storage API client. If supplied, use the faster
+ BigQuery Storage API to fetch rows from BigQuery.
+
+ This method requires the ``pyarrow`` and
+ ``google-cloud-bigquery-storage`` libraries.
+
+ This method only exposes a subset of the capabilities of the
+ BigQuery Storage API. For full access to all features
+ (projections, filters, snapshots) use the Storage API directly.
+
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+ A dictionary of column names pandas ``dtype``s. The provided
+ ``dtype`` is used when constructing the series for the column
+ specified. Otherwise, the default pandas behavior is used.
+ progress_bar_type (Optional[str]):
+ If set, use the `tqdm `_ library to
+ display a progress bar while the data downloads. Install the
+ ``tqdm`` package to use this feature.
+
+ Possible values of ``progress_bar_type`` include:
+
+ ``None``
+ No progress bar.
+ ``'tqdm'``
+ Use the :func:`tqdm.tqdm` function to print a progress bar
+ to :data:`sys.stderr`.
+ ``'tqdm_notebook'``
+ Use the :func:`tqdm.tqdm_notebook` function to display a
+ progress bar as a Jupyter notebook widget.
+ ``'tqdm_gui'``
+ Use the :func:`tqdm.tqdm_gui` function to display a
+ progress bar as a graphical dialog box.
+
+ create_bqstorage_client (Optional[bool]):
+ If ``True`` (default), create a BigQuery Storage API client
+ using the default API settings. The BigQuery Storage API
+ is a faster way to fetch rows from BigQuery. See the
+ ``bqstorage_client`` parameter for more information.
+
+ This argument does nothing if ``bqstorage_client`` is supplied.
+
+ date_as_object (Optional[bool]):
+ If ``True`` (default), cast dates to objects. If ``False``, convert
+ to datetime64[ns] dtype.
+
+ geography_column (Optional[str]):
+ If there are more than one GEOGRAPHY column,
+ identifies which one to use to construct a geopandas
+ GeoDataFrame. This option can be ommitted if there's
+ only one GEOGRAPHY column.
+
+ Returns:
+ geopandas.GeoDataFrame:
+ A :class:`geopandas.GeoDataFrame` populated with row
+ data and column headers from the query results. The
+ column headers are derived from the destination
+ table's schema.
+
+ Raises:
+ ValueError:
+ If the :mod:`geopandas` library cannot be imported, or the
+ :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported.
+
+ .. versionadded:: 2.24.0
+ """
+ if geopandas is None:
+ raise ValueError(_NO_GEOPANDAS_ERROR)
+
+ geography_columns = set(
+ field.name
+ for field in self.schema
+ if field.field_type.upper() == "GEOGRAPHY"
+ )
+ if not geography_columns:
+ raise TypeError(
+ "There must be at least one GEOGRAPHY column"
+ " to create a GeoDataFrame"
+ )
+
+ if geography_column:
+ if geography_column not in geography_columns:
+ raise ValueError(
+ f"The given geography column, {geography_column}, doesn't name"
+ f" a GEOGRAPHY column in the result."
+ )
+ elif len(geography_columns) == 1:
+ [geography_column] = geography_columns
+ else:
+ raise ValueError(
+ "There is more than one GEOGRAPHY column in the result. "
+ "The geography_column argument must be used to specify which "
+ "one to use to create a GeoDataFrame"
+ )
+
+ df = self.to_dataframe(
+ bqstorage_client,
+ dtypes,
+ progress_bar_type,
+ create_bqstorage_client,
+ date_as_object,
+ geography_as_object=True,
+ )
+
+ return geopandas.GeoDataFrame(
+ df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column
+ )
+
class _EmptyRowIterator(RowIterator):
"""An empty row iterator.
@@ -2018,6 +2184,7 @@ def to_dataframe(
progress_bar_type=None,
create_bqstorage_client=True,
date_as_object=True,
+ geography_as_object=False,
) -> "pandas.DataFrame":
"""Create an empty dataframe.
@@ -2035,6 +2202,31 @@ def to_dataframe(
raise ValueError(_NO_PANDAS_ERROR)
return pandas.DataFrame()
+ def to_geodataframe(
+ self,
+ bqstorage_client=None,
+ dtypes=None,
+ progress_bar_type=None,
+ create_bqstorage_client=True,
+ date_as_object=True,
+ geography_column: Optional[str] = None,
+ ) -> "pandas.DataFrame":
+ """Create an empty dataframe.
+
+ Args:
+ bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.
+ dtypes (Any): Ignored. Added for compatibility with RowIterator.
+ progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
+ create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+ date_as_object (bool): Ignored. Added for compatibility with RowIterator.
+
+ Returns:
+ pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
+ """
+ if geopandas is None:
+ raise ValueError(_NO_GEOPANDAS_ERROR)
+ return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM)
+
def to_dataframe_iterable(
self,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py
index 84f6b4643..1f7d79ab9 100644
--- a/google/cloud/bigquery/version.py
+++ b/google/cloud/bigquery/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "2.24.0"
+__version__ = "2.26.0"
diff --git a/noxfile.py b/noxfile.py
index 0dfe7bf93..dbf6a163c 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -94,7 +94,7 @@ def unit(session):
default(session)
-@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1])
+@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]])
def unit_noextras(session):
"""Run the unit test suite."""
default(session, install_extras=False)
@@ -160,10 +160,6 @@ def snippets(session):
if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false":
session.skip("RUN_SNIPPETS_TESTS is set to false, skipping")
- # Sanity check: Only run snippets tests if the environment variable is set.
- if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""):
- session.skip("Credentials must be set via environment variable.")
-
constraints_path = str(
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
)
diff --git a/owlbot.py b/owlbot.py
index 09845480a..09aa8ca6f 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -63,7 +63,7 @@
s.replace(
library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py",
r"type_ ",
- "type "
+ "type ",
)
s.move(
@@ -78,8 +78,8 @@
"noxfile.py",
"setup.py",
f"scripts/fixup_bigquery_{library.name}_keywords.py",
- f"google/cloud/bigquery/__init__.py",
- f"google/cloud/bigquery/py.typed",
+ "google/cloud/bigquery/__init__.py",
+ "google/cloud/bigquery/py.typed",
# There are no public API endpoints for the generated ModelServiceClient,
# thus there's no point in generating it and its tests.
f"google/cloud/bigquery_{library.name}/services/**",
@@ -97,6 +97,10 @@
samples=True,
microgenerator=True,
split_system_tests=True,
+ intersphinx_dependencies={
+ "pandas": "http://pandas.pydata.org/pandas-docs/dev",
+ "geopandas": "https://geopandas.org/",
+ },
)
# BigQuery has a custom multiprocessing note
@@ -109,7 +113,11 @@
# Include custom SNIPPETS_TESTS job for performance.
# https://github.com/googleapis/python-bigquery/issues/191
".kokoro/presubmit/presubmit.cfg",
- ]
+ # Group all renovate PRs together. If this works well, remove this and
+ # update the shared templates (possibly with configuration option to
+ # py_library.)
+ "renovate.json",
+ ],
)
# ----------------------------------------------------------------------------
@@ -121,14 +129,14 @@
s.replace(
"docs/conf.py",
r'\{"members": True\}',
- '{"members": True, "inherited-members": True}'
+ '{"members": True, "inherited-members": True}',
)
# Tell Sphinx to ingore autogenerated docs files.
s.replace(
"docs/conf.py",
r'"samples/snippets/README\.rst",',
- '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator',
+ '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator',
)
# ----------------------------------------------------------------------------
@@ -136,13 +144,14 @@
# ----------------------------------------------------------------------------
# Add .pytype to .gitignore
-s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype")
+s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype")
# Add pytype config to setup.cfg
s.replace(
"setup.cfg",
r"universal = 1",
- textwrap.dedent(""" \g<0>
+ textwrap.dedent(
+ """ \\g<0>
[pytype]
python_version = 3.8
@@ -156,7 +165,56 @@
# There's some issue with finding some pyi files, thus disabling.
# The issue https://github.com/google/pytype/issues/150 is closed, but the
# error still occurs for some reason.
- pyi-error""")
+ pyi-error"""
+ ),
+)
+
+
+# Remove the replacements below once
+# https://github.com/googleapis/synthtool/pull/1188 is merged
+
+# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files
+s.replace(
+ ".kokoro/*.sh",
+ "repo-automation-bots/tree/master",
+ "repo-automation-bots/tree/main",
+)
+
+# Customize CONTRIBUTING.rst to replace master with main
+s.replace(
+ "CONTRIBUTING.rst",
+ "fetch and merge changes from upstream into master",
+ "fetch and merge changes from upstream into main",
+)
+
+s.replace(
+ "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main",
+)
+
+s.replace(
+ "CONTRIBUTING.rst",
+ """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""",
+ """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""",
+)
+
+s.replace(
+ "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)",
+)
+
+s.replace(
+ "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst",
+)
+
+s.replace(
+ "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py",
+)
+
+s.replace(
+ "docs/conf.py", "master_doc", "root_doc",
+)
+
+s.replace(
+ "docs/conf.py", "# The master toctree document.", "# The root toctree document.",
)
s.shell.run(["nox", "-s", "blacken"], hide_output=False)
diff --git a/renovate.json b/renovate.json
index c04895563..713c60bb4 100644
--- a/renovate.json
+++ b/renovate.json
@@ -1,6 +1,6 @@
{
"extends": [
- "config:base", ":preserveSemverRanges"
+ "config:base", "group:all", ":preserveSemverRanges"
],
"ignorePaths": [".pre-commit-config.yaml"],
"pip_requirements": {
diff --git a/samples/create_routine.py b/samples/create_routine.py
index 012c7927a..1cb4a80b4 100644
--- a/samples/create_routine.py
+++ b/samples/create_routine.py
@@ -22,7 +22,7 @@ def create_routine(routine_id):
# Construct a BigQuery client object.
client = bigquery.Client()
- # TODO(developer): Choose a fully-qualified ID for the routine.
+ # TODO(developer): Choose a fully qualified ID for the routine.
# routine_id = "my-project.my_dataset.my_routine"
routine = bigquery.Routine(
diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt
index b0cf76724..5d836a5c5 100644
--- a/samples/geography/requirements-test.txt
+++ b/samples/geography/requirements-test.txt
@@ -1,2 +1,2 @@
-pytest==6.2.4
+pytest==6.2.5
mock==4.0.3
diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index dfee339d4..8fb578018 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -1,4 +1,51 @@
+attrs==21.2.0
+cachetools==4.2.2
+certifi==2021.5.30
+cffi==1.14.6
+charset-normalizer==2.0.4
+click==8.0.1
+click-plugins==1.1.1
+cligj==0.7.2
+dataclasses==0.6; python_version < '3.7'
+Fiona==1.8.20
geojson==2.5.0
-google-cloud-bigquery==2.24.0
-google-cloud-bigquery-storage==2.6.3
+geopandas==0.9.0
+google-api-core==2.0.1
+google-auth==2.0.2
+google-cloud-bigquery==2.26.0
+google-cloud-bigquery-storage==2.7.0
+google-cloud-core==2.0.0
+google-crc32c==1.1.2
+google-resumable-media==2.0.2
+googleapis-common-protos==1.53.0
+grpcio==1.39.0
+idna==3.2
+importlib-metadata==4.8.1
+libcst==0.3.20
+munch==2.5.0
+mypy-extensions==0.4.3
+numpy==1.19.5; python_version < "3.7"
+numpy==1.21.2; python_version > "3.6"
+packaging==21.0
+pandas==1.1.5; python_version < '3.7'
+pandas==1.3.2; python_version >= '3.7'
+proto-plus==1.19.0
+protobuf==3.17.3
+pyarrow==5.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+pyparsing==2.4.7
+pyproj==3.0.1; python_version < "3.7"
+pyproj==3.1.0; python_version > "3.6"
+python-dateutil==2.8.2
+pytz==2021.1
+PyYAML==5.4.1
+requests==2.26.0
+rsa==4.7.2
Shapely==1.7.1
+six==1.16.0
+typing-extensions==3.10.0.2
+typing-inspect==0.7.1
+urllib3==1.26.6
+zipp==3.5.0
diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py
new file mode 100644
index 000000000..fa8073fef
--- /dev/null
+++ b/samples/geography/to_geodataframe.py
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+client = bigquery.Client()
+
+
+def get_austin_service_requests_as_geography():
+ # [START bigquery_query_results_geodataframe]
+
+ sql = """
+ SELECT created_date, complaint_description,
+ ST_GEOGPOINT(longitude, latitude) as location
+ FROM bigquery-public-data.austin_311.311_service_requests
+ LIMIT 10
+ """
+
+ df = client.query(sql).to_geodataframe()
+ # [END bigquery_query_results_geodataframe]
+ return df
diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py
new file mode 100644
index 000000000..7a2ba6937
--- /dev/null
+++ b/samples/geography/to_geodataframe_test.py
@@ -0,0 +1,25 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from .to_geodataframe import get_austin_service_requests_as_geography
+
+
+def test_get_austin_service_requests_as_geography():
+ geopandas = pytest.importorskip("geopandas")
+ df = get_austin_service_requests_as_geography()
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert len(list(df)) == 3 # verify the number of columns
+ assert len(df) == 10 # verify the number of rows
diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt
index b8dee50d0..caa48813a 100644
--- a/samples/snippets/requirements-test.txt
+++ b/samples/snippets/requirements-test.txt
@@ -1,3 +1,3 @@
-google-cloud-testutils==1.0.0
-pytest==6.2.4
+google-cloud-testutils==1.1.0
+pytest==6.2.5
mock==4.0.3
diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt
index 264899dff..e096af157 100644
--- a/samples/snippets/requirements.txt
+++ b/samples/snippets/requirements.txt
@@ -1,12 +1,12 @@
-google-cloud-bigquery==2.24.0
-google-cloud-bigquery-storage==2.6.3
-google-auth-oauthlib==0.4.5
+google-cloud-bigquery==2.26.0
+google-cloud-bigquery-storage==2.7.0
+google-auth-oauthlib==0.4.6
grpcio==1.39.0
ipython==7.16.1; python_version < '3.7'
ipython==7.17.0; python_version >= '3.7'
matplotlib==3.3.4; python_version < '3.7'
matplotlib==3.4.1; python_version >= '3.7'
pandas==1.1.5; python_version < '3.7'
-pandas==1.2.0; python_version >= '3.7'
+pandas==1.3.2; python_version >= '3.7'
pyarrow==5.0.0
pytz==2021.1
diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py
index 59ec1fae9..c1b0bb5a7 100644
--- a/samples/tests/test_routine_samples.py
+++ b/samples/tests/test_routine_samples.py
@@ -32,7 +32,6 @@ def test_create_routine_ddl(capsys, random_routine_id, client):
out, err = capsys.readouterr()
assert "Created routine {}".format(random_routine_id) in out
- return routine
assert routine.type_ == "SCALAR_FUNCTION"
assert routine.language == "SQL"
expected_arguments = [
diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py
index 6e844cc90..a5c2670e7 100644
--- a/samples/update_dataset_access.py
+++ b/samples/update_dataset_access.py
@@ -28,8 +28,8 @@ def update_dataset_access(dataset_id):
entry = bigquery.AccessEntry(
role="READER",
- entity_type="userByEmail",
- entity_id="sample.bigquery.dev@gmail.com",
+ entity_type="groupByEmail",
+ entity_id="cloud-developer-relations@google.com",
)
entries = list(dataset.access_entries)
diff --git a/setup.py b/setup.py
index 6fa619d37..f1464e77a 100644
--- a/setup.py
+++ b/setup.py
@@ -51,6 +51,7 @@
# See: https://github.com/googleapis/python-bigquery/issues/757
"bqstorage": [],
"pandas": ["pandas>=1.0.0"],
+ "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
"opentelemetry-api >= 0.11b0",
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index bf1f89f58..6e27172b2 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -5,6 +5,7 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
+geopandas==0.9.0
google-api-core==1.29.0
google-cloud-bigquery-storage==2.0.0
google-cloud-core==1.4.1
@@ -18,5 +19,6 @@ proto-plus==1.10.0
protobuf==3.12.0
pyarrow==3.0.0
requests==2.18.0
+Shapely==1.6.0
six==1.13.0
tqdm==4.7.4
diff --git a/tests/system/test_client.py b/tests/system/test_client.py
index c6896da14..4b9868f10 100644
--- a/tests/system/test_client.py
+++ b/tests/system/test_client.py
@@ -2351,9 +2351,6 @@ def test_create_table_rows_fetch_nested_schema(self):
self.assertEqual(found[7], e_favtime)
self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"]))
- def _fetch_dataframe(self, query):
- return Config.CLIENT.query(query).result().to_dataframe()
-
def test_nested_table_to_arrow(self):
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 2bd496e83..075d3b680 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -278,8 +278,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id):
def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id):
# Schema with all scalar types.
- # TODO: Uploading DATETIME columns currently fails, thus that field type
- # is temporarily removed from the test.
# See:
# https://github.com/googleapis/python-bigquery/issues/61
# https://issuetracker.google.com/issues/151765076
@@ -287,7 +285,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("bool_col", "BOOLEAN"),
bigquery.SchemaField("bytes_col", "BYTES"),
bigquery.SchemaField("date_col", "DATE"),
- # bigquery.SchemaField("dt_col", "DATETIME"),
+ bigquery.SchemaField("dt_col", "DATETIME"),
bigquery.SchemaField("float_col", "FLOAT"),
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
@@ -312,14 +310,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
("bool_col", [True, None, False]),
("bytes_col", [b"abc", None, b"def"]),
("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]),
- # (
- # "dt_col",
- # [
- # datetime.datetime(1, 1, 1, 0, 0, 0),
- # None,
- # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
- # ],
- # ),
+ (
+ "dt_col",
+ [
+ datetime.datetime(1, 1, 1, 0, 0, 0),
+ None,
+ datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+ ],
+ ),
("float_col", [float("-inf"), float("nan"), float("inf")]),
(
"geo_col",
@@ -899,3 +897,190 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
# pandas uses Python string and bytes objects.
assert df.dtypes["bytes_col"].name == "object"
assert df.dtypes["string_col"].name == "object"
+
+
+def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
+ df = pandas.DataFrame(
+ dict(
+ dt=[
+ datetime.datetime(2020, 1, 8, 8, 0, 0),
+ datetime.datetime(
+ 2020,
+ 1,
+ 8,
+ 8,
+ 0,
+ 0,
+ tzinfo=datetime.timezone(datetime.timedelta(hours=-7)),
+ ),
+ ],
+ t=[datetime.time(0, 0, 10, 100001), None],
+ )
+ )
+ table = f"{dataset_id}.test_upload_time_and_datetime"
+ bigquery_client.load_table_from_dataframe(df, table).result()
+ data = list(map(list, bigquery_client.list_rows(table)))
+ assert data == [
+ [
+ datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc),
+ datetime.time(0, 0, 10, 100001),
+ ],
+ [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None],
+ ]
+
+ from google.cloud.bigquery import job, schema
+
+ table = f"{dataset_id}.test_upload_time_and_datetime_dt"
+ config = job.LoadJobConfig(
+ schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")]
+ )
+
+ bigquery_client.load_table_from_dataframe(df, table, job_config=config).result()
+ data = list(map(list, bigquery_client.list_rows(table)))
+ assert data == [
+ [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)],
+ [datetime.datetime(2020, 1, 8, 15, 0), None],
+ ]
+
+
+def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ bigquery_client.query(
+ f"create table {dataset_id}.lake (name string, geog geography)"
+ ).result()
+ bigquery_client.query(
+ f"""
+ insert into {dataset_id}.lake (name, geog) values
+ ('foo', st_geogfromtext('point(0 0)')),
+ ('bar', st_geogfromtext('point(0 1)')),
+ ('baz', null)
+ """
+ ).result()
+ df = bigquery_client.query(
+ f"select * from {dataset_id}.lake order by name"
+ ).to_dataframe(geography_as_object=True)
+ assert list(df["name"]) == ["bar", "baz", "foo"]
+ assert df["geog"][0] == wkt.loads("point(0 1)")
+ assert pandas.isna(df["geog"][1])
+ assert df["geog"][2] == wkt.loads("point(0 0)")
+
+
+def test_to_geodataframe(bigquery_client, dataset_id):
+ geopandas = pytest.importorskip("geopandas")
+ from shapely import wkt
+
+ bigquery_client.query(
+ f"create table {dataset_id}.geolake (name string, geog geography)"
+ ).result()
+ bigquery_client.query(
+ f"""
+ insert into {dataset_id}.geolake (name, geog) values
+ ('foo', st_geogfromtext('point(0 0)')),
+ ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')),
+ ('baz', null)
+ """
+ ).result()
+ df = bigquery_client.query(
+ f"select * from {dataset_id}.geolake order by name"
+ ).to_geodataframe()
+ assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))")
+ assert pandas.isna(df["geog"][1])
+ assert df["geog"][2] == wkt.loads("point(0 0)")
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert isinstance(df["geog"], geopandas.GeoSeries)
+ assert df.area[0] == 0.5
+ assert pandas.isna(df.area[1])
+ assert df.area[2] == 0.0
+ assert df.crs.srs == "EPSG:4326"
+ assert df.crs.name == "WGS 84"
+ assert df.geog.crs.srs == "EPSG:4326"
+ assert df.geog.crs.name == "WGS 84"
+
+
+def test_load_geodataframe(bigquery_client, dataset_id):
+ geopandas = pytest.importorskip("geopandas")
+ import pandas
+ from shapely import wkt
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = geopandas.GeoDataFrame(
+ pandas.DataFrame(
+ dict(
+ name=["foo", "bar"],
+ geo1=[None, None],
+ geo2=[None, wkt.loads("Point(1 1)")],
+ )
+ ),
+ geometry="geo1",
+ )
+
+ table_id = f"{dataset_id}.lake_from_gp"
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo1", "GEOGRAPHY", "NULLABLE"),
+ SchemaField("geo2", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", None, "POINT(1 1)"],
+ ["foo", None, None],
+ ]
+
+
+def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = pandas.DataFrame(
+ dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")])
+ )
+
+ table_id = f"{dataset_id}.lake_from_shapes"
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ]
+
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ["foo", None],
+ ]
+
+
+def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ from shapely import wkb
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = pandas.DataFrame(
+ dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))])
+ )
+
+ table_id = f"{dataset_id}.lake_from_wkb"
+ # We create the table first, to inform the interpretation of the wkb data
+ bigquery_client.query(
+ f"create table {table_id} (name string, geo GEOGRAPHY)"
+ ).result()
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ]
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 7a67ea6b5..feba65aa5 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import mock
import pytest
from .helpers import make_client
@@ -35,3 +36,21 @@ def DS_ID():
@pytest.fixture
def LOCATION():
yield "us-central"
+
+
+def noop_add_server_timeout_header(headers, kwargs):
+ if headers:
+ kwargs["headers"] = headers
+ return kwargs
+
+
+@pytest.fixture(autouse=True)
+def disable_add_server_timeout_header(request):
+ if "enable_add_server_timeout_header" in request.keywords:
+ yield
+ else:
+ with mock.patch(
+ "google.cloud.bigquery.client._add_server_timeout_header",
+ noop_add_server_timeout_header,
+ ):
+ yield
diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py
index c792214e7..3642c7229 100644
--- a/tests/unit/job/helpers.py
+++ b/tests/unit/job/helpers.py
@@ -14,36 +14,20 @@
import unittest
-import mock
from google.api_core import exceptions
-
-def _make_credentials():
- import google.auth.credentials
-
- return mock.Mock(spec=google.auth.credentials.Credentials)
+from ..helpers import make_connection, make_client as __make_client
def _make_client(project="test-project", connection=None):
- from google.cloud.bigquery.client import Client
-
+ client = __make_client(project)
if connection is None:
- connection = _make_connection()
+ connection = make_connection()
- client = Client(project=project, credentials=_make_credentials(), _http=object())
client._connection = connection
return client
-def _make_connection(*responses):
- import google.cloud.bigquery._http
- from google.cloud.exceptions import NotFound
-
- mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection)
- mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")]
- return mock_conn
-
-
def _make_retriable_exception():
return exceptions.TooManyRequests(
"retriable exception", errors=[{"reason": "rateLimitExceeded"}]
diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py
index c3f7854e3..aa8e9c045 100644
--- a/tests/unit/job/test_base.py
+++ b/tests/unit/job/test_base.py
@@ -21,8 +21,9 @@
import mock
import pytest
+from ..helpers import make_connection
+
from .helpers import _make_client
-from .helpers import _make_connection
from .helpers import _make_retriable_exception
from .helpers import _make_job_resource
@@ -740,7 +741,7 @@ def test_cancel_defaults(self):
response = {"job": resource}
job = self._set_properties_job()
job._properties["jobReference"]["location"] = self.LOCATION
- connection = job._client._connection = _make_connection(response)
+ connection = job._client._connection = make_connection(response)
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
@@ -769,7 +770,7 @@ def test_cancel_explicit(self):
response = {"job": resource}
job = self._set_properties_job()
client = _make_client(project=other_project)
- connection = client._connection = _make_connection(response)
+ connection = client._connection = make_connection(response)
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
@@ -930,7 +931,7 @@ def test_result_default_wo_state(self):
started=True,
ended=True,
)
- conn = _make_connection(
+ conn = make_connection(
_make_retriable_exception(),
begun_job_resource,
_make_retriable_exception(),
@@ -968,7 +969,7 @@ def test_result_w_retry_wo_state(self):
started=True,
ended=True,
)
- conn = _make_connection(
+ conn = make_connection(
exceptions.NotFound("not normally retriable"),
begun_job_resource,
exceptions.NotFound("not normally retriable"),
@@ -1008,7 +1009,7 @@ def test_result_w_retry_wo_state(self):
)
def test_result_explicit_w_state(self):
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, client)
# Use _set_properties() instead of directly modifying _properties so
diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py
index 992efcf6b..d94e5bc88 100644
--- a/tests/unit/job/test_copy.py
+++ b/tests/unit/job/test_copy.py
@@ -14,9 +14,10 @@
import mock
+from ..helpers import make_connection
+
from .helpers import _Base
from .helpers import _make_client
-from .helpers import _make_connection
class TestCopyJobConfig(_Base):
@@ -333,7 +334,7 @@ def test_begin_w_bound_client(self):
del RESOURCE["etag"]
del RESOURCE["selfLink"]
del RESOURCE["user_email"]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
source = self._table_ref(self.SOURCE_TABLE)
destination = self._table_ref(self.DESTINATION_TABLE)
@@ -396,9 +397,9 @@ def test_begin_w_alternate_client(self):
"writeDisposition": WriteDisposition.WRITE_TRUNCATE,
}
RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
source = self._table_ref(self.SOURCE_TABLE)
destination = self._table_ref(self.DESTINATION_TABLE)
@@ -427,7 +428,7 @@ def test_begin_w_alternate_client(self):
def test_exists_miss_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
source = self._table_ref(self.SOURCE_TABLE)
@@ -446,9 +447,9 @@ def test_exists_miss_w_bound_client(self):
def test_exists_hit_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection({})
+ conn2 = make_connection({})
client2 = _make_client(project=self.PROJECT, connection=conn2)
source = self._table_ref(self.SOURCE_TABLE)
destination = self._table_ref(self.DESTINATION_TABLE)
@@ -468,7 +469,7 @@ def test_exists_hit_w_alternate_client(self):
def test_reload_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
source = self._table_ref(self.SOURCE_TABLE)
destination = self._table_ref(self.DESTINATION_TABLE)
@@ -488,9 +489,9 @@ def test_reload_w_bound_client(self):
def test_reload_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
source = self._table_ref(self.SOURCE_TABLE)
destination = self._table_ref(self.DESTINATION_TABLE)
diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py
index 4c9411d0d..8bada51af 100644
--- a/tests/unit/job/test_extract.py
+++ b/tests/unit/job/test_extract.py
@@ -14,9 +14,10 @@
import mock
+from ..helpers import make_connection
+
from .helpers import _Base
from .helpers import _make_client
-from .helpers import _make_connection
class TestExtractJobConfig(_Base):
@@ -265,7 +266,7 @@ def test_begin_w_bound_client(self):
del RESOURCE["etag"]
del RESOURCE["selfLink"]
del RESOURCE["user_email"]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
source = source_dataset.table(self.SOURCE_TABLE)
@@ -318,9 +319,9 @@ def test_begin_w_alternate_client(self):
"printHeader": False,
}
RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
source = source_dataset.table(self.SOURCE_TABLE)
@@ -353,7 +354,7 @@ def test_begin_w_alternate_client(self):
def test_exists_miss_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(
self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client
@@ -371,9 +372,9 @@ def test_exists_miss_w_bound_client(self):
def test_exists_hit_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection({})
+ conn2 = make_connection({})
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(
self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1
@@ -395,7 +396,7 @@ def test_reload_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
source = source_dataset.table(self.SOURCE_TABLE)
@@ -416,9 +417,9 @@ def test_reload_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
source = source_dataset.table(self.SOURCE_TABLE)
diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py
index 70e7860a7..cf2096b8b 100644
--- a/tests/unit/job/test_load.py
+++ b/tests/unit/job/test_load.py
@@ -16,9 +16,10 @@
import mock
+from ..helpers import make_connection
+
from .helpers import _Base
from .helpers import _make_client
-from .helpers import _make_connection
class TestLoadJob(_Base):
@@ -238,7 +239,7 @@ def test_result_invokes_begin(self):
begun_resource = self._make_resource()
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(begun_resource, done_resource)
+ connection = make_connection(begun_resource, done_resource)
client = _make_client(self.PROJECT)
client._connection = connection
@@ -421,7 +422,7 @@ def test_from_api_repr_w_properties(self):
self._verifyResourceProperties(job, RESOURCE)
def test_begin_w_already_running(self):
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
job._properties["status"] = {"state": "RUNNING"}
@@ -436,7 +437,7 @@ def test_begin_w_bound_client(self):
del RESOURCE["etag"]
del RESOURCE["selfLink"]
del RESOURCE["user_email"]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
path = "/projects/{}/jobs".format(self.PROJECT)
@@ -478,7 +479,7 @@ def test_begin_w_autodetect(self):
del resource["etag"]
del resource["selfLink"]
del resource["user_email"]
- conn = _make_connection(resource)
+ conn = make_connection(resource)
client = _make_client(project=self.PROJECT, connection=conn)
config = LoadJobConfig()
config.autodetect = True
@@ -559,9 +560,9 @@ def test_begin_w_alternate_client(self):
"schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
}
RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
age = SchemaField("age", "INTEGER", mode="REQUIRED")
@@ -611,7 +612,7 @@ def test_begin_w_job_reference(self):
resource["jobReference"]["projectId"] = "alternative-project"
resource["jobReference"]["location"] = "US"
job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US")
- conn = _make_connection(resource)
+ conn = make_connection(resource)
client = _make_client(project=self.PROJECT, connection=conn)
load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -634,7 +635,7 @@ def test_begin_w_job_reference(self):
def test_exists_miss_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -654,9 +655,9 @@ def test_exists_miss_w_bound_client(self):
def test_exists_hit_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection({})
+ conn2 = make_connection({})
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1)
with mock.patch(
@@ -679,7 +680,7 @@ def test_exists_miss_w_job_reference(self):
from google.cloud.bigquery import job
job_ref = job._JobReference("my-job-id", "other-project", "US")
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -701,7 +702,7 @@ def test_exists_miss_w_job_reference(self):
def test_reload_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -719,9 +720,9 @@ def test_reload_w_bound_client(self):
def test_reload_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1)
with mock.patch(
@@ -744,7 +745,7 @@ def test_reload_w_job_reference(self):
resource["jobReference"]["projectId"] = "alternative-project"
resource["jobReference"]["location"] = "US"
job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US")
- conn = _make_connection(resource)
+ conn = make_connection(resource)
client = _make_client(project=self.PROJECT, connection=conn)
load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -769,7 +770,7 @@ def test_cancel_w_bound_client(self):
PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource(ended=True)
RESPONSE = {"job": RESOURCE}
- conn = _make_connection(RESPONSE)
+ conn = make_connection(RESPONSE)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
@@ -788,9 +789,9 @@ def test_cancel_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource(ended=True)
RESPONSE = {"job": RESOURCE}
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESPONSE)
+ conn2 = make_connection(RESPONSE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1)
with mock.patch(
@@ -813,7 +814,7 @@ def test_cancel_w_job_reference(self):
resource["jobReference"]["projectId"] = "alternative-project"
resource["jobReference"]["location"] = "US"
job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US")
- conn = _make_connection({"job": resource})
+ conn = make_connection({"job": resource})
client = _make_client(project=self.PROJECT, connection=conn)
load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client)
with mock.patch(
diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py
index d41370520..4c598d797 100644
--- a/tests/unit/job/test_query.py
+++ b/tests/unit/job/test_query.py
@@ -26,9 +26,11 @@
from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS
import google.cloud.bigquery.query
+
+from ..helpers import make_connection
+
from .helpers import _Base
from .helpers import _make_client
-from .helpers import _make_connection
class TestQueryJob(_Base):
@@ -943,7 +945,7 @@ def test_result(self):
"pageToken": None,
"rows": [{"f": [{"v": "abc"}]}],
}
- conn = _make_connection(
+ conn = make_connection(
query_resource, query_resource_done, job_resource_done, query_page_resource
)
client = _make_client(self.PROJECT, connection=conn)
@@ -1005,7 +1007,7 @@ def test_result_with_done_job_calls_get_query_results(self):
"pageToken": None,
"rows": [{"f": [{"v": "abc"}]}],
}
- conn = _make_connection(query_resource_done, results_page_resource)
+ conn = make_connection(query_resource_done, results_page_resource)
client = _make_client(self.PROJECT, connection=conn)
job = self._get_target_class().from_api_repr(job_resource, client)
@@ -1052,7 +1054,7 @@ def test_result_with_max_results(self):
{"f": [{"v": "ghi"}]},
],
}
- connection = _make_connection(query_resource, query_page_resource)
+ connection = make_connection(query_resource, query_page_resource)
client = _make_client(self.PROJECT, connection=connection)
resource = self._make_resource(ended=True)
job = self._get_target_class().from_api_repr(resource, client)
@@ -1096,7 +1098,7 @@ def test_result_w_retry(self):
"tableId": "dest_table",
}
- connection = _make_connection(
+ connection = make_connection(
exceptions.NotFound("not normally retriable"),
query_resource,
exceptions.NotFound("not normally retriable"),
@@ -1144,7 +1146,7 @@ def test_result_w_empty_schema(self):
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"schema": {"fields": []},
}
- connection = _make_connection(query_resource, query_resource)
+ connection = make_connection(query_resource, query_resource)
client = _make_client(self.PROJECT, connection=connection)
resource = self._make_resource(ended=True)
job = self._get_target_class().from_api_repr(resource, client)
@@ -1165,7 +1167,7 @@ def test_result_invokes_begins(self):
query_resource["jobComplete"] = True
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource,
incomplete_resource,
query_resource,
@@ -1196,7 +1198,7 @@ def test_result_w_timeout(self):
}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(begun_resource, query_resource, done_resource)
+ connection = make_connection(begun_resource, query_resource, done_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)
@@ -1245,7 +1247,7 @@ def test_result_w_page_size(self):
],
}
query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]}
- conn = _make_connection(
+ conn = make_connection(
query_results_resource, query_page_resource, query_page_resource_2
)
client = _make_client(self.PROJECT, connection=conn)
@@ -1303,7 +1305,7 @@ def test_result_with_start_index(self):
{"f": [{"v": "jkl"}]},
],
}
- connection = _make_connection(query_resource, tabledata_resource)
+ connection = make_connection(query_resource, tabledata_resource)
client = _make_client(self.PROJECT, connection=connection)
resource = self._make_resource(ended=True)
job = self._get_target_class().from_api_repr(resource, client)
@@ -1428,7 +1430,7 @@ def test__begin_w_timeout(self):
PATH = "/projects/%s/jobs" % (self.PROJECT,)
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, self.QUERY, client)
with mock.patch(
@@ -1462,7 +1464,7 @@ def test_begin_w_bound_client(self):
del RESOURCE["etag"]
del RESOURCE["selfLink"]
del RESOURCE["user_email"]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
config = QueryJobConfig()
@@ -1530,9 +1532,9 @@ def test_begin_w_alternate_client(self):
}
RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION
RESOURCE["configuration"]["dryRun"] = True
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
dataset_ref = DatasetReference(self.PROJECT, DS_ID)
table_ref = dataset_ref.table(TABLE)
@@ -1588,7 +1590,7 @@ def test_begin_w_udf(self):
{"resourceUri": RESOURCE_URI},
{"inlineCode": INLINE_UDF_CODE},
]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
udf_resources = [
UDFResource("resourceUri", RESOURCE_URI),
@@ -1647,7 +1649,7 @@ def test_begin_w_named_query_parameter(self):
"parameterValue": {"value": "123"},
}
]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
jconfig = QueryJobConfig()
jconfig.query_parameters = query_parameters
@@ -1695,7 +1697,7 @@ def test_begin_w_positional_query_parameter(self):
config["queryParameters"] = [
{"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}}
]
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
jconfig = QueryJobConfig()
jconfig.query_parameters = query_parameters
@@ -1774,7 +1776,7 @@ def test_begin_w_table_defs(self):
csv_table: CSV_CONFIG_RESOURCE,
}
want_resource = copy.deepcopy(RESOURCE)
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
config = QueryJobConfig()
config.table_definitions = {bt_table: bt_config, csv_table: csv_config}
@@ -1818,7 +1820,7 @@ def test_dry_run_query(self):
del RESOURCE["selfLink"]
del RESOURCE["user_email"]
RESOURCE["configuration"]["dryRun"] = True
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
config = QueryJobConfig()
config.dry_run = True
@@ -1846,7 +1848,7 @@ def test_dry_run_query(self):
def test_exists_miss_w_bound_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn = _make_connection()
+ conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, self.QUERY, client)
with mock.patch(
@@ -1862,9 +1864,9 @@ def test_exists_miss_w_bound_client(self):
def test_exists_hit_w_alternate_client(self):
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection({})
+ conn2 = make_connection({})
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(self.JOB_ID, self.QUERY, client1)
with mock.patch(
@@ -1887,7 +1889,7 @@ def test_reload_w_bound_client(self):
DS_ID = "DATASET"
DEST_TABLE = "dest_table"
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
dataset_ref = DatasetReference(self.PROJECT, DS_ID)
table_ref = dataset_ref.table(DEST_TABLE)
@@ -1919,9 +1921,9 @@ def test_reload_w_alternate_client(self):
"datasetId": DS_ID,
"tableId": DEST_TABLE,
}
- conn1 = _make_connection()
+ conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
- conn2 = _make_connection(RESOURCE)
+ conn2 = make_connection(RESOURCE)
client2 = _make_client(project=self.PROJECT, connection=conn2)
job = self._make_one(self.JOB_ID, self.QUERY, client1)
with mock.patch(
@@ -1945,7 +1947,7 @@ def test_reload_w_timeout(self):
DS_ID = "DATASET"
DEST_TABLE = "dest_table"
RESOURCE = self._make_resource()
- conn = _make_connection(RESOURCE)
+ conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
dataset_ref = DatasetReference(self.PROJECT, DS_ID)
table_ref = dataset_ref.table(DEST_TABLE)
@@ -1975,7 +1977,7 @@ def test_iter(self):
}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(begun_resource, query_resource, done_resource)
+ connection = make_connection(begun_resource, query_resource, done_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index c3a9d2d1a..8e4fba770 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -22,13 +22,26 @@
from google.cloud import bigquery_storage
+try:
+ import pandas
+except (ImportError, AttributeError): # pragma: NO COVER
+ pandas = None
+try:
+ import shapely
+except (ImportError, AttributeError): # pragma: NO COVER
+ shapely = None
+try:
+ import geopandas
+except (ImportError, AttributeError): # pragma: NO COVER
+ geopandas = None
try:
from tqdm import tqdm
except (ImportError, AttributeError): # pragma: NO COVER
tqdm = None
+from ..helpers import make_connection
+
from .helpers import _make_client
-from .helpers import _make_connection
from .helpers import _make_job_resource
pandas = pytest.importorskip("pandas")
@@ -106,7 +119,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
},
"totalRows": "4",
}
- connection = _make_connection(get_query_results_resource, job_resource)
+ connection = make_connection(get_query_results_resource, job_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(job_resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -187,7 +200,7 @@ def test_to_arrow():
}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource, query_resource, done_resource, tabledata_resource
)
client = _make_client(connection=connection)
@@ -231,7 +244,7 @@ def test_to_arrow_max_results_no_progress_bar():
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
begun_resource = _make_job_resource(job_type="query")
job = target_class.from_api_repr(begun_resource, client)
@@ -277,7 +290,7 @@ def test_to_arrow_w_tqdm_w_query_plan():
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -333,7 +346,7 @@ def test_to_arrow_w_tqdm_w_pending_status():
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -384,7 +397,7 @@ def test_to_arrow_w_tqdm_wo_query_plan():
SchemaField("name", "STRING", mode="REQUIRED"),
SchemaField("age", "INTEGER", mode="REQUIRED"),
]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -409,37 +422,41 @@ def test_to_arrow_w_tqdm_wo_query_plan():
result_patch_tqdm.assert_called()
-def test_to_dataframe():
+def _make_job(schema=(), rows=()):
from google.cloud.bigquery.job import QueryJob as target_class
begun_resource = _make_job_resource(job_type="query")
query_resource = {
"jobComplete": True,
"jobReference": begun_resource["jobReference"],
- "totalRows": "4",
+ "totalRows": str(len(rows)),
"schema": {
"fields": [
- {"name": "name", "type": "STRING", "mode": "NULLABLE"},
- {"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
+ dict(name=field[0], type=field[1], mode=field[2]) for field in schema
]
},
}
- tabledata_resource = {
- "rows": [
- {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
- {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
- {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
- {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
- ]
- }
+ tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource, query_resource, done_resource, tabledata_resource
)
client = _make_client(connection=connection)
- job = target_class.from_api_repr(begun_resource, client)
+ return target_class.from_api_repr(begun_resource, client)
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_to_dataframe():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "32"),
+ ("Bharney Rhubble", "33"),
+ ("Wylma Phlyntstone", "29"),
+ ("Bhettye Rhubble", "27"),
+ ),
+ )
df = job.to_dataframe(create_bqstorage_client=False)
assert isinstance(df, pandas.DataFrame)
@@ -457,7 +474,7 @@ def test_to_dataframe_ddl_query():
"jobReference": resource["jobReference"],
"schema": {"fields": []},
}
- connection = _make_connection(query_resource)
+ connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(resource, client)
@@ -481,7 +498,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg):
]
},
}
- connection = _make_connection(query_resource)
+ connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -525,7 +542,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
"totalRows": "4",
"schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]},
}
- connection = _make_connection(query_resource)
+ connection = make_connection(query_resource)
client = _make_client(connection=connection)
job = target_class.from_api_repr(resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -594,7 +611,7 @@ def test_to_dataframe_column_dtypes():
query_resource["rows"] = rows
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource, query_resource, done_resource, query_resource
)
client = _make_client(connection=connection)
@@ -633,7 +650,7 @@ def test_to_dataframe_column_date_dtypes():
query_resource["rows"] = rows
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource, query_resource, done_resource, query_resource
)
client = _make_client(connection=connection)
@@ -661,7 +678,7 @@ def test_to_dataframe_with_progress_bar(tqdm_mock):
}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
- connection = _make_connection(
+ connection = make_connection(
begun_resource, query_resource, done_resource, query_resource, query_resource,
)
client = _make_client(connection=connection)
@@ -693,7 +710,7 @@ def test_to_dataframe_w_tqdm_pending():
{"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -748,7 +765,7 @@ def test_to_dataframe_w_tqdm():
{"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -802,7 +819,7 @@ def test_to_dataframe_w_tqdm_max_results():
]
rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}]
- connection = _make_connection({})
+ connection = make_connection({})
client = _make_client(connection=connection)
job = target_class.from_api_repr(begun_resource, client)
@@ -835,3 +852,94 @@ def test_to_dataframe_w_tqdm_max_results():
result_patch_tqdm.assert_called_with(
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3
)
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(shapely is None, reason="Requires `shapely`")
+def test_to_dataframe_geography_as_object():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "Point(0 0)"),
+ ("Bharney Rhubble", "Point(0 1)"),
+ ("Wylma Phlyntstone", None),
+ ),
+ )
+ df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True)
+
+ assert isinstance(df, pandas.DataFrame)
+ assert len(df) == 3 # verify the number of rows
+ assert list(df) == ["name", "geog"] # verify the column names
+ assert [v.__class__.__name__ for v in df.geog] == [
+ "Point",
+ "Point",
+ "float",
+ ] # float because nan
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_to_geodataframe():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "Point(0 0)"),
+ ("Bharney Rhubble", "Point(0 1)"),
+ ("Wylma Phlyntstone", None),
+ ),
+ )
+ df = job.to_geodataframe(create_bqstorage_client=False)
+
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert len(df) == 3 # verify the number of rows
+ assert list(df) == ["name", "geog"] # verify the column names
+ assert [v.__class__.__name__ for v in df.geog] == [
+ "Point",
+ "Point",
+ "NoneType",
+ ] # float because nan
+ assert isinstance(df.geog, geopandas.GeoSeries)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@mock.patch("google.cloud.bigquery.job.query.wait_for_query")
+def test_query_job_to_geodataframe_delegation(wait_for_query):
+ """
+ QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe.
+
+ This test just demonstrates that. We don't need to test all the
+ variations, which are tested for RowIterator.
+ """
+ import numpy
+
+ job = _make_job()
+ bqstorage_client = object()
+ dtypes = dict(xxx=numpy.dtype("int64"))
+ progress_bar_type = "normal"
+ create_bqstorage_client = False
+ date_as_object = False
+ max_results = 42
+ geography_column = "g"
+
+ df = job.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ max_results=max_results,
+ geography_column=geography_column,
+ )
+
+ wait_for_query.assert_called_once_with(
+ job, progress_bar_type, max_results=max_results
+ )
+ row_iterator = wait_for_query.return_value
+ row_iterator.to_geodataframe.assert_called_once_with(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
+ )
+ assert df is row_iterator.to_geodataframe.return_value
diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py
index cf60cf749..035f04456 100644
--- a/tests/unit/test__helpers.py
+++ b/tests/unit/test__helpers.py
@@ -21,6 +21,12 @@
class TestBQStorageVersions(unittest.TestCase):
+ def tearDown(self):
+ from google.cloud.bigquery import _helpers
+
+ # Reset any cached versions since it may not match reality.
+ _helpers.BQ_STORAGE_VERSIONS._installed_version = None
+
def _object_under_test(self):
from google.cloud.bigquery import _helpers
@@ -52,6 +58,34 @@ def test_is_read_session_optional_false(self):
assert not versions.is_read_session_optional
+class TestPyarrowVersions(unittest.TestCase):
+ def tearDown(self):
+ from google.cloud.bigquery import _helpers
+
+ # Reset any cached versions since it may not match reality.
+ _helpers.PYARROW_VERSIONS._installed_version = None
+
+ def _object_under_test(self):
+ from google.cloud.bigquery import _helpers
+
+ return _helpers.PyarrowVersions()
+
+ def test_installed_version_returns_cached(self):
+ versions = self._object_under_test()
+ versions._installed_version = object()
+ assert versions.installed_version is versions._installed_version
+
+ def test_installed_version_returns_parsed_version(self):
+ versions = self._object_under_test()
+
+ with mock.patch("pyarrow.__version__", new="1.2.3"):
+ version = versions.installed_version
+
+ assert version.major == 1
+ assert version.minor == 2
+ assert version.micro == 3
+
+
class Test_not_null(unittest.TestCase):
def _call_fut(self, value, field):
from google.cloud.bigquery._helpers import _not_null
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index 5a792527a..0140beb77 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -32,14 +32,19 @@
import pyarrow
import pyarrow.types
+
+try:
+ import geopandas
+except ImportError: # pragma: NO COVER
+ geopandas = None
+
import pytest
from google import api_core
+from google.cloud import bigquery_storage
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema
-from google.cloud import bigquery_storage
-
PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
@@ -566,6 +571,57 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test):
assert roundtrip[3] is None
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_bq_to_arrow_array_w_geography_dtype(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+ # All na:
+ series = geopandas.GeoSeries([None, None])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ assert array.type == pyarrow.string()
+ assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = pandas.Series([None, wkt.loads("point(0 0)")])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+ # All na:
+ series = pandas.Series([None, None])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ assert array.type == pyarrow.string()
+ assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == list(series)
+
+
def test_bq_to_arrow_schema_w_unknown_type(module_under_test):
fields = (
schema.SchemaField("field1", "STRING"),
@@ -743,6 +799,41 @@ def test_dataframe_to_json_generator(module_under_test):
assert list(rows) == expected
+def test_dataframe_to_json_generator_repeated_field(module_under_test):
+ pytest.importorskip(
+ "pandas",
+ minversion=str(PANDAS_MINIUM_VERSION),
+ reason=(
+ f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` "
+ "which introduces pandas.NA"
+ ),
+ )
+
+ df_data = [
+ collections.OrderedDict(
+ [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")]
+ ),
+ collections.OrderedDict(
+ [
+ ("repeated_col", ["a", "b", mock.sentinel.foo, "d"]),
+ ("not_repeated_col", "second"),
+ ]
+ ),
+ ]
+ dataframe = pandas.DataFrame(df_data)
+
+ rows = module_under_test.dataframe_to_json_generator(dataframe)
+
+ expected = [
+ {"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"},
+ {
+ "repeated_col": ["a", "b", mock.sentinel.foo, "d"],
+ "not_repeated_col": "second",
+ },
+ ]
+ assert list(rows) == expected
+
+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_list_columns_and_indexes_with_named_index(module_under_test):
df_data = collections.OrderedDict(
@@ -804,7 +895,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
def test_dataframe_to_bq_schema_dict_sequence(module_under_test):
df_data = collections.OrderedDict(
[
- ("str_column", [u"hello", u"world"]),
+ ("str_column", ["hello", "world"]),
("int_column", [42, 8]),
("bool_column", [True, False]),
]
@@ -988,7 +1079,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
]
dataframe = pandas.DataFrame(
- {"field01": [u"hello", u"world"], "field02": [True, False]}
+ {"field01": ["hello", "world"], "field02": [True, False]}
)
arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema)
@@ -1002,7 +1093,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch):
+def test_dataframe_to_parquet_w_extra_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_parquet(
pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None
@@ -1013,7 +1104,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch):
+def test_dataframe_to_parquet_w_missing_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_parquet(
pandas.DataFrame({"not_in_bq": [1, 2, 3]}), (), None
@@ -1046,8 +1137,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
dataframe = pandas.DataFrame(
data=[
- {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)},
- {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)},
+ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)},
+ {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)},
]
)
@@ -1075,8 +1166,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
dataframe = pandas.DataFrame(
data=[
- {"struct_field": {"one": 2}, "status": u"FOO"},
- {"struct_field": {"two": u"222"}, "status": u"BAR"},
+ {"struct_field": {"one": 2}, "status": "FOO"},
+ {"struct_field": {"two": "222"}, "status": "BAR"},
]
)
@@ -1095,6 +1186,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
assert "struct_field" in str(expected_warnings[0])
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_dataframe_to_bq_schema_geography(module_under_test):
+ from shapely import wkt
+
+ df = geopandas.GeoDataFrame(
+ pandas.DataFrame(
+ dict(
+ name=["foo", "bar"],
+ geo1=[None, None],
+ geo2=[None, wkt.loads("Point(1 1)")],
+ )
+ ),
+ geometry="geo1",
+ )
+ bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
+ assert bq_schema == (
+ schema.SchemaField("name", "STRING"),
+ schema.SchemaField("geo1", "GEOGRAPHY"),
+ schema.SchemaField("geo2", "GEOGRAPHY"),
+ )
+
+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_augment_schema_type_detection_succeeds(module_under_test):
dataframe = pandas.DataFrame(
@@ -1107,7 +1220,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
"timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55),
"date_field": datetime.date(2005, 5, 31),
"bytes_field": b"some bytes",
- "string_field": u"some characters",
+ "string_field": "some characters",
"numeric_field": decimal.Decimal("123.456"),
"bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
}
@@ -1166,13 +1279,13 @@ def test_augment_schema_type_detection_fails(module_under_test):
dataframe = pandas.DataFrame(
data=[
{
- "status": u"FOO",
+ "status": "FOO",
"struct_field": {"one": 1},
- "struct_field_2": {"foo": u"123"},
+ "struct_field_2": {"foo": "123"},
},
{
- "status": u"BAR",
- "struct_field": {"two": u"111"},
+ "status": "BAR",
+ "struct_field": {"two": "111"},
"struct_field_2": {"bar": 27},
},
]
@@ -1206,7 +1319,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
]
dataframe = pandas.DataFrame(
- {"field01": [u"hello", u"world"], "field02": [True, False]}
+ {"field01": ["hello", "world"], "field02": [True, False]}
)
write_table_patch = mock.patch.object(
@@ -1479,3 +1592,21 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test)
def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test):
dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {})
assert isinstance(dataframe, pandas.DataFrame)
+
+
+def test_bq_to_arrow_field_type_override(module_under_test):
+ # When loading pandas data, we may need to override the type
+ # decision based on data contents, because GEOGRAPHY data can be
+ # stored as either text or binary.
+
+ assert (
+ module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type
+ == pyarrow.string()
+ )
+
+ assert (
+ module_under_test.bq_to_arrow_field(
+ schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(),
+ ).type
+ == pyarrow.binary()
+ )
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index 458798afa..4c6ec5b4f 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -52,6 +52,7 @@
from google.cloud import bigquery_storage
from google.cloud import bigquery_v2
from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from tests.unit.helpers import make_connection
@@ -358,7 +359,7 @@ def test__get_query_results_miss_w_client_location(self):
method="GET",
path="/projects/PROJECT/queries/nothere",
query_params={"maxResults": 0, "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test__get_query_results_hit(self):
@@ -419,7 +420,9 @@ def test_get_service_account_email_w_alternate_project(self):
service_account_email = client.get_service_account_email(project=project)
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None)
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, timeout=DEFAULT_TIMEOUT
+ )
self.assertEqual(service_account_email, email)
def test_get_service_account_email_w_custom_retry(self):
@@ -687,7 +690,7 @@ def test_create_routine_w_conflict(self):
}
}
conn.api_request.assert_called_once_with(
- method="POST", path=path, data=resource, timeout=None,
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`")
@@ -723,7 +726,7 @@ def test_span_status_is_set(self):
}
}
conn.api_request.assert_called_once_with(
- method="POST", path=path, data=resource, timeout=None,
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
)
def test_create_routine_w_conflict_exists_ok(self):
@@ -759,11 +762,13 @@ def test_create_routine_w_conflict_exists_ok(self):
self.assertEqual(actual_routine.routine_id, "minimal_routine")
conn.api_request.assert_has_calls(
[
- mock.call(method="POST", path=path, data=resource, timeout=None,),
+ mock.call(
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
+ ),
mock.call(
method="GET",
path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine",
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
]
)
@@ -839,7 +844,7 @@ def test_create_table_w_custom_property(self):
"newAlphaProperty": "unreleased property",
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got._properties["newAlphaProperty"], "unreleased property")
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -880,7 +885,7 @@ def test_create_table_w_encryption_configuration(self):
"labels": {},
"encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -916,7 +921,7 @@ def test_create_table_w_day_partition_and_expire(self):
"timePartitioning": {"type": "DAY", "expirationMs": "100"},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(table.time_partitioning.type_, "DAY")
self.assertEqual(table.time_partitioning.expiration_ms, 100)
@@ -997,7 +1002,7 @@ def test_create_table_w_schema_and_query(self):
"view": {"query": query, "useLegacySql": False},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
self.assertEqual(got.project, self.PROJECT)
@@ -1052,7 +1057,7 @@ def test_create_table_w_external(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
self.assertEqual(got.project, self.PROJECT)
@@ -1091,7 +1096,7 @@ def test_create_table_w_reference(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1125,7 +1130,7 @@ def test_create_table_w_fully_qualified_string(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1157,7 +1162,7 @@ def test_create_table_w_string(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1192,7 +1197,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_create_table_alreadyexists_w_exists_ok_true(self):
@@ -1235,9 +1240,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
- mock.call(method="GET", path=get_path, timeout=None),
+ mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
]
)
@@ -1310,7 +1315,7 @@ def test_get_model_w_string(self):
final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None)
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % path, timeout=None
+ method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT
)
self.assertEqual(got.model_id, self.MODEL_ID)
@@ -1419,7 +1424,7 @@ def test_get_table_sets_user_agent(self):
"User-Agent": expected_user_agent,
},
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIn("my-application/1.2.3", expected_user_agent)
@@ -1717,7 +1722,6 @@ def test_update_dataset(self):
"access": ACCESS,
},
path="/" + PATH,
- headers=None,
timeout=7.5,
)
self.assertEqual(ds2.description, ds.description)
@@ -1761,8 +1765,7 @@ def test_update_dataset_w_custom_property(self):
method="PATCH",
data={"newAlphaProperty": "unreleased property"},
path=path,
- headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(dataset.dataset_id, self.DS_ID)
@@ -1820,7 +1823,7 @@ def test_update_model(self):
"labels": {"x": "y"},
}
conn.api_request.assert_called_once_with(
- method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5
+ method="PATCH", data=sent, path="/" + path, timeout=7.5
)
self.assertEqual(updated_model.model_id, model.model_id)
self.assertEqual(updated_model.description, model.description)
@@ -1893,7 +1896,6 @@ def test_update_routine(self):
method="PUT",
data=sent,
path="/projects/routines-project/datasets/test_routines/routines/updated_routine",
- headers=None,
timeout=7.5,
)
self.assertEqual(actual_routine.arguments, routine.arguments)
@@ -2001,7 +2003,7 @@ def test_update_table(self):
"labels": {"x": "y"},
}
conn.api_request.assert_called_once_with(
- method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5
+ method="PATCH", data=sent, path="/" + path, timeout=7.5
)
self.assertEqual(updated_table.description, table.description)
self.assertEqual(updated_table.friendly_name, table.friendly_name)
@@ -2051,8 +2053,7 @@ def test_update_table_w_custom_property(self):
method="PATCH",
path="/%s" % path,
data={"newAlphaProperty": "unreleased property"},
- headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(
updated_table._properties["newAlphaProperty"], "unreleased property"
@@ -2086,8 +2087,7 @@ def test_update_table_only_use_legacy_sql(self):
method="PATCH",
path="/%s" % path,
data={"view": {"useLegacySql": True}},
- headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
@@ -2184,8 +2184,7 @@ def test_update_table_w_query(self):
"expirationTime": str(_millis(exp_time)),
"schema": schema_resource,
},
- headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_update_table_w_schema_None(self):
@@ -2314,7 +2313,7 @@ def test_delete_job_metadata_not_found(self):
method="DELETE",
path="/projects/client-proj/jobs/my-job/delete",
query_params={"location": "client-loc"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_delete_job_metadata_with_id(self):
@@ -2328,7 +2327,7 @@ def test_delete_job_metadata_with_id(self):
method="DELETE",
path="/projects/param-proj/jobs/my-job/delete",
query_params={"location": "param-loc"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_delete_job_metadata_with_resource(self):
@@ -2353,7 +2352,7 @@ def test_delete_job_metadata_with_resource(self):
method="DELETE",
path="/projects/job-based-proj/jobs/query_job/delete",
query_params={"location": "us-east1"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_delete_model(self):
@@ -2408,7 +2407,9 @@ def test_delete_model_w_not_found_ok_false(self):
with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID))
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_model_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}/models/{}".format(
@@ -2429,7 +2430,9 @@ def test_delete_model_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_routine(self):
from google.cloud.bigquery.routine import Routine
@@ -2483,7 +2486,7 @@ def test_delete_routine_w_not_found_ok_false(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
conn.api_request.assert_called_with(
- method="DELETE", path=path, timeout=None,
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT,
)
def test_delete_routine_w_not_found_ok_true(self):
@@ -2505,7 +2508,7 @@ def test_delete_routine_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
conn.api_request.assert_called_with(
- method="DELETE", path=path, timeout=None,
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT,
)
def test_delete_table(self):
@@ -2569,7 +2572,9 @@ def test_delete_table_w_not_found_ok_false(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_table_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}/tables/{}".format(
@@ -2591,7 +2596,9 @@ def test_delete_table_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def _create_job_helper(self, job_config):
creds = _make_credentials()
@@ -2609,7 +2616,7 @@ def _create_job_helper(self, job_config):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=RESOURCE,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_create_job_load_config(self):
@@ -2758,7 +2765,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self):
method="POST",
path="/projects/PROJECT/jobs",
data=data_without_destination,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
)
@@ -2798,7 +2805,7 @@ def test_get_job_miss_w_explict_project(self):
method="GET",
path="/projects/OTHER_PROJECT/jobs/NONESUCH",
query_params={"projection": "full"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_get_job_miss_w_client_location(self):
@@ -2816,7 +2823,7 @@ def test_get_job_miss_w_client_location(self):
method="GET",
path="/projects/client-proj/jobs/NONESUCH",
query_params={"projection": "full", "location": "client-loc"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_get_job_hit_w_timeout(self):
@@ -2885,7 +2892,7 @@ def test_cancel_job_miss_w_explict_project(self):
method="POST",
path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel",
query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_cancel_job_miss_w_client_location(self):
@@ -2904,7 +2911,7 @@ def test_cancel_job_miss_w_client_location(self):
method="POST",
path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel",
query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_cancel_job_hit(self):
@@ -2940,7 +2947,7 @@ def test_cancel_job_hit(self):
method="POST",
path="/projects/job-based-proj/jobs/query_job/cancel",
query_params={"projection": "full", "location": "asia-northeast1"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_cancel_job_w_timeout(self):
@@ -3066,7 +3073,7 @@ def test_load_table_from_uri_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_load_table_from_uri_w_client_location(self):
@@ -3110,7 +3117,7 @@ def test_load_table_from_uri_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_load_table_from_uri_w_invalid_job_config(self):
@@ -3398,7 +3405,7 @@ def test_copy_table_w_multiple_sources(self):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=expected_resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIsInstance(job, CopyJob)
self.assertIs(job._client, client)
@@ -3460,7 +3467,7 @@ def test_copy_table_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_copy_table_w_client_location(self):
@@ -3510,7 +3517,7 @@ def test_copy_table_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_copy_table_w_source_strings(self):
@@ -3603,7 +3610,7 @@ def test_copy_table_w_valid_job_config(self):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=RESOURCE,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIsInstance(job._configuration, CopyJobConfig)
@@ -3709,7 +3716,7 @@ def test_extract_table_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_extract_table_w_client_location(self):
@@ -3753,7 +3760,7 @@ def test_extract_table_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_extract_table_generated_job_id(self):
@@ -3796,7 +3803,7 @@ def test_extract_table_generated_job_id(self):
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
self.assertIsInstance(req["data"]["jobReference"]["jobId"], str)
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
# Check the job resource.
self.assertIsInstance(job, ExtractJob)
@@ -3841,7 +3848,7 @@ def test_extract_table_w_destination_uris(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
# Check the job resource.
self.assertIsInstance(job, ExtractJob)
@@ -4011,7 +4018,7 @@ def test_query_defaults(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertIsInstance(sent["jobReference"]["jobId"], str)
sent_config = sent["configuration"]["query"]
@@ -4064,7 +4071,7 @@ def test_query_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_explicit_job_config(self):
@@ -4120,7 +4127,10 @@ def test_query_w_explicit_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original config object should not have been modified
@@ -4164,7 +4174,10 @@ def test_query_preserving_explicit_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original config object should not have been modified
@@ -4216,7 +4229,10 @@ def test_query_preserving_explicit_default_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original default config object should not have been modified
@@ -4301,7 +4317,10 @@ def test_query_w_explicit_job_config_override(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_client_default_config_no_incoming(self):
@@ -4342,7 +4361,10 @@ def test_query_w_client_default_config_no_incoming(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_invalid_default_job_config(self):
@@ -4387,7 +4409,7 @@ def test_query_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_detect_location(self):
@@ -4458,7 +4480,7 @@ def test_query_w_udf_resources(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertIsInstance(sent["jobReference"]["jobId"], str)
sent_config = sent["configuration"]["query"]
@@ -4514,7 +4536,7 @@ def test_query_w_query_parameters(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertEqual(sent["jobReference"]["jobId"], JOB)
sent_config = sent["configuration"]["query"]
@@ -4706,7 +4728,7 @@ def _row_data(row):
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/%s" % PATH)
self.assertEqual(req["data"], SENT)
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
def test_insert_rows_w_list_of_dictionaries(self):
import datetime
@@ -4774,7 +4796,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_list_of_Rows(self):
@@ -4819,7 +4841,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_skip_invalid_and_ignore_unknown(self):
@@ -4896,7 +4918,7 @@ def _row_data(row):
errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0]
)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_repeated_fields(self):
@@ -4997,7 +5019,7 @@ def test_insert_rows_w_repeated_fields(self):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None,
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_w_record_schema(self):
@@ -5063,7 +5085,7 @@ def test_insert_rows_w_record_schema(self):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_explicit_none_insert_ids(self):
@@ -5097,7 +5119,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/{}".format(PATH), data=SENT, timeout=None,
+ method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_errors(self):
@@ -5181,7 +5203,7 @@ def test_insert_rows_w_numeric(self):
project, ds_id, table_id
),
data=sent,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(pandas is None, "Requires `pandas`")
@@ -5373,7 +5395,10 @@ def test_insert_rows_from_dataframe_many_columns(self):
]
}
expected_call = mock.call(
- method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None
+ method="POST",
+ path=API_PATH,
+ data=EXPECTED_SENT_DATA,
+ timeout=DEFAULT_TIMEOUT,
)
actual_calls = conn.api_request.call_args_list
@@ -5426,7 +5451,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
actual_calls = conn.api_request.call_args_list
assert len(actual_calls) == 1
assert actual_calls[0] == mock.call(
- method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None
+ method="POST",
+ path=API_PATH,
+ data=EXPECTED_SENT_DATA,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_json_default_behavior(self):
@@ -5506,7 +5534,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected_row_data,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_json_w_explicitly_disabled_insert_ids(self):
@@ -5536,7 +5564,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected_row_data,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_json_with_iterator_row_ids(self):
@@ -5563,7 +5591,7 @@ def test_insert_rows_json_with_iterator_row_ids(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected_row_data,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_json_with_non_iterable_row_ids(self):
@@ -5616,7 +5644,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_json_w_none_insert_ids_sequence(self):
@@ -5655,7 +5683,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected_row_data,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_w_wrong_arg(self):
@@ -5850,7 +5878,7 @@ def test_list_rows_w_start_index_w_page_size(self):
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
mock.call(
method="GET",
@@ -5860,7 +5888,7 @@ def test_list_rows_w_start_index_w_page_size(self):
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
]
)
@@ -6011,7 +6039,7 @@ def test_list_rows_repeated_fields(self):
"selectedFields": "color,struct",
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_list_rows_w_record_schema(self):
@@ -6081,7 +6109,7 @@ def test_list_rows_w_record_schema(self):
method="GET",
path="/%s" % PATH,
query_params={"formatOptions.useInt64Timestamp": True},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_list_rows_with_missing_schema(self):
@@ -6136,7 +6164,7 @@ def test_list_rows_with_missing_schema(self):
row_iter = client.list_rows(table)
conn.api_request.assert_called_once_with(
- method="GET", path=table_path, timeout=None
+ method="GET", path=table_path, timeout=DEFAULT_TIMEOUT
)
conn.api_request.reset_mock()
self.assertEqual(row_iter.total_rows, 2, msg=repr(table))
@@ -6146,7 +6174,7 @@ def test_list_rows_with_missing_schema(self):
method="GET",
path=tabledata_path,
query_params={"formatOptions.useInt64Timestamp": True},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(row_iter.total_rows, 3, msg=repr(table))
self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table))
@@ -6319,7 +6347,7 @@ def test_load_table_from_file_resumable(self):
file_obj,
self.EXPECTED_CONFIGURATION,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -6352,7 +6380,7 @@ def test_load_table_from_file_w_explicit_project(self):
file_obj,
expected_resource,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project="other-project",
)
@@ -6386,7 +6414,7 @@ def test_load_table_from_file_w_client_location(self):
file_obj,
expected_resource,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project="other-project",
)
@@ -6448,7 +6476,7 @@ def test_load_table_from_file_resumable_metadata(self):
file_obj,
expected_config,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -6477,7 +6505,7 @@ def test_load_table_from_file_multipart(self):
self.EXPECTED_CONFIGURATION,
file_obj_size,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.PROJECT,
)
@@ -6502,7 +6530,7 @@ def test_load_table_from_file_with_retries(self):
file_obj,
self.EXPECTED_CONFIGURATION,
num_retries,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -6539,7 +6567,7 @@ def test_load_table_from_file_with_readable_gzip(self):
gzip_file,
self.EXPECTED_CONFIGURATION,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -6661,7 +6689,7 @@ def test_load_table_from_dataframe(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
@@ -6718,7 +6746,7 @@ def test_load_table_from_dataframe_w_client_location(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
@@ -6771,7 +6799,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -6826,7 +6854,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -6918,7 +6946,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -6983,7 +7011,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7037,7 +7065,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7083,7 +7111,7 @@ def test_load_table_from_dataframe_unknown_table(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(
@@ -7124,7 +7152,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7171,7 +7199,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7232,7 +7260,7 @@ def test_load_table_from_dataframe_struct_fields(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7306,7 +7334,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7427,7 +7455,7 @@ def test_load_table_from_dataframe_w_nulls(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7493,7 +7521,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
@@ -7531,7 +7559,7 @@ def test_load_table_from_json_basic_use(self):
location=client.location,
project=client.project,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7584,7 +7612,7 @@ def test_load_table_from_json_non_default_args(self):
location="EU",
project="project-x",
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7644,7 +7672,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self):
location=client.location,
project=client.project,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_data_file = load_table_from_file.mock_calls[0][1][1]
@@ -7974,3 +8002,20 @@ def transmit_next_chunk(transport):
chunk_size = RU.call_args_list[0][0][1]
assert chunk_size == 100 * (1 << 20)
+
+
+@pytest.mark.enable_add_server_timeout_header
+@pytest.mark.parametrize("headers", [None, {}])
+def test__call_api_add_server_timeout_w_timeout(client, headers):
+ client._connection = make_connection({})
+ client._call_api(None, method="GET", path="/", headers=headers, timeout=42)
+ client._connection.api_request.assert_called_with(
+ method="GET", path="/", timeout=42, headers={"X-Server-Timeout": "42"}
+ )
+
+
+@pytest.mark.enable_add_server_timeout_header
+def test__call_api_no_add_server_timeout_wo_timeout(client):
+ client._connection = make_connection({})
+ client._call_api(None, method="GET", path="/")
+ client._connection.api_request.assert_called_with(method="GET", path="/")
diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py
index d07aaed4f..67b21225d 100644
--- a/tests/unit/test_create_dataset.py
+++ b/tests/unit/test_create_dataset.py
@@ -15,6 +15,7 @@
from google.cloud.bigquery.dataset import Dataset, DatasetReference
from .helpers import make_connection, dataset_polymorphic, make_client
import google.cloud.bigquery.dataset
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
import mock
import pytest
@@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
"labels": LABELS,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID):
"newAlphaProperty": "unreleased property",
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO
"labels": {},
"location": LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC
"labels": {},
"location": OTHER_LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION):
"labels": {},
"location": LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION):
"labels": {},
"location": LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION):
"labels": {},
"location": LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION)
"labels": {},
"location": LOCATION,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
- mock.call(method="GET", path=get_path, timeout=None),
+ mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
]
)
diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py
index 4afc47b6c..84c74eeec 100644
--- a/tests/unit/test_dbapi__helpers.py
+++ b/tests/unit/test_dbapi__helpers.py
@@ -606,8 +606,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect):
"parameters,parameter_types,expect",
[
(
- [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))],
- ["ARRAY", "struct"],
+ [[], dict(name="ch1", b_date=datetime.date(2021, 1, 1))],
+ ["ARRAY", "struct"],
[
{
"parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"},
@@ -617,13 +617,13 @@ def test_complex_query_parameter_type_errors(type_, value, expect):
"parameterType": {
"structTypes": [
{"name": "name", "type": {"type": "STRING"}},
- {"name": "bdate", "type": {"type": "DATE"}},
+ {"name": "b_date", "type": {"type": "DATE"}},
],
"type": "STRUCT",
},
"parameterValue": {
"structValues": {
- "bdate": {"value": "2021-01-01"},
+ "b_date": {"value": "2021-01-01"},
"name": {"value": "ch1"},
}
},
diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py
index f075bb6f7..07bce986f 100644
--- a/tests/unit/test_dbapi_cursor.py
+++ b/tests/unit/test_dbapi_cursor.py
@@ -785,6 +785,10 @@ def test__format_operation_no_placeholders(self):
"values(%%%%%(foo:INT64)s, %(bar)s)",
("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")),
),
+ (
+ "values(%%%%%(foo:struct)s, %(bar)s)",
+ ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")),
+ ),
(
"values(%%%%%(foo:struct)s, %(bar)s)",
("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")),
diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py
index 3a65e031c..b48beb147 100644
--- a/tests/unit/test_delete_dataset.py
+++ b/tests/unit/test_delete_dataset.py
@@ -14,6 +14,7 @@
from .helpers import make_connection, make_client, dataset_polymorphic
import google.api_core.exceptions
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
import pytest
@@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents(
method="DELETE",
path="/%s" % PATH,
query_params={"deleteContents": "true"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID):
client.delete_dataset(DS_ID)
conn.api_request.assert_called_with(
- method="DELETE", path=path, query_params={}, timeout=None
+ method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT
)
@@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID):
)
client.delete_dataset(DS_ID, not_found_ok=True)
conn.api_request.assert_called_with(
- method="DELETE", path=path, query_params={}, timeout=None
+ method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT
)
diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py
index 7793a7ba6..6f0b55c5e 100644
--- a/tests/unit/test_list_datasets.py
+++ b/tests/unit/test_list_datasets.py
@@ -1,11 +1,11 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,6 +15,7 @@
import mock
import pytest
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from .helpers import make_connection
@@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query):
assert token == TOKEN
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params=query, timeout=None
+ method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT
)
@@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT):
"maxResults": 3,
"pageToken": TOKEN,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py
index f348be724..1fb40d446 100644
--- a/tests/unit/test_list_jobs.py
+++ b/tests/unit/test_list_jobs.py
@@ -1,11 +1,11 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,7 @@
import mock
import pytest
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from .helpers import make_connection
@@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query):
method="GET",
path="/%s" % PATH,
query_params=dict({"projection": "full"}, **query),
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID):
method="GET",
path="/%s" % PATH,
query_params={"projection": "full"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT):
"allUsers": True,
"stateFilter": "done",
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT):
method="GET",
path="/projects/other-project/jobs",
query_params={"projection": "full"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT):
"minCreationTime": "1",
"maxCreationTime": str(end_time_millis),
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT):
method="GET",
path="/projects/%s/jobs" % PROJECT,
query_params={"projection": "full", "parentJobId": "parent-job-123"},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
conn.api_request.reset_mock()
diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py
index 4ede9a7dd..b14852338 100644
--- a/tests/unit/test_list_models.py
+++ b/tests/unit/test_list_models.py
@@ -1,20 +1,22 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from .helpers import make_connection, dataset_polymorphic
import pytest
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
def test_list_models_empty_w_timeout(client, PROJECT, DS_ID):
path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID)
@@ -82,7 +84,7 @@ def test_list_models_defaults(
assert token == TOKEN
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params=query, timeout=None
+ method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT
)
diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py
index a88540dd5..190612b44 100644
--- a/tests/unit/test_list_projects.py
+++ b/tests/unit/test_list_projects.py
@@ -1,11 +1,11 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,6 +15,7 @@
import mock
import pytest
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from .helpers import make_connection
@@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query):
assert token == TOKEN
conn.api_request.assert_called_once_with(
- method="GET", path="/projects", query_params=query, timeout=None
+ method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT
)
@@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client):
method="GET",
path="/projects",
query_params={"maxResults": 3, "pageToken": TOKEN},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py
index 069966542..80e62d6bd 100644
--- a/tests/unit/test_list_routines.py
+++ b/tests/unit/test_list_routines.py
@@ -1,20 +1,22 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from .helpers import make_connection, dataset_polymorphic
import pytest
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
def test_list_routines_empty_w_timeout(client):
conn = client._connection = make_connection({})
@@ -85,7 +87,7 @@ def test_list_routines_defaults(
assert actual_token == token
conn.api_request.assert_called_once_with(
- method="GET", path=path, query_params=query, timeout=None
+ method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT
)
diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py
index 45d15bed3..8360f6605 100644
--- a/tests/unit/test_list_tables.py
+++ b/tests/unit/test_list_tables.py
@@ -1,21 +1,23 @@
# Copyright 2021 Google LLC
-
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-
+#
# https://www.apache.org/licenses/LICENSE-2.0
-
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from .helpers import make_connection, dataset_polymorphic
-import google.cloud.bigquery.dataset
import pytest
+import google.cloud.bigquery.dataset
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
@dataset_polymorphic
def test_list_tables_empty_w_timeout(
@@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I
assert token == TOKEN
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params={}, timeout=None
+ method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT
)
@@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID):
method="GET",
path="/%s" % PATH,
query_params={"maxResults": 3, "pageToken": TOKEN},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py
index bb3a8d1fd..01c213e98 100644
--- a/tests/unit/test_magics.py
+++ b/tests/unit/test_magics.py
@@ -32,6 +32,7 @@
from google.cloud.bigquery import job
from google.cloud.bigquery import table
from google.cloud.bigquery.magics import magics
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from tests.unit.helpers import make_connection
from test_utils.imports import maybe_fail_import
@@ -172,7 +173,7 @@ def test_context_with_default_connection():
method="POST",
path="/projects/project-from-env/jobs",
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
query_results_call = mock.call(
method="GET",
@@ -236,7 +237,7 @@ def test_context_with_custom_connection():
method="POST",
path="/projects/project-from-env/jobs",
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
query_results_call = mock.call(
method="GET",
@@ -593,7 +594,9 @@ def warning_match(warning):
assert client_info.user_agent == "ipython-" + IPython.__version__
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm"
+ bqstorage_client=bqstorage_instance_mock,
+ create_bqstorage_client=mock.ANY,
+ progress_bar_type="tqdm",
)
assert isinstance(return_value, pandas.DataFrame)
@@ -635,7 +638,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch):
bqstorage_mock.assert_not_called()
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=None, progress_bar_type="tqdm"
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type="tqdm",
)
assert isinstance(return_value, pandas.DataFrame)
@@ -689,7 +694,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
client_query_mock.return_value = query_job_mock
ip.run_cell_magic("bigquery", "--max_results=5", sql)
- query_job_mock.result.assert_called_with(max_results=5)
+ query_job_mock.result.assert_called_with(max_results=5)
+ query_job_mock.result.return_value.to_dataframe.assert_called_once_with(
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=mock.ANY,
+ )
@pytest.mark.usefixtures("ipython_interactive")
@@ -858,7 +868,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client():
ip.run_cell_magic("bigquery", "--max_results=5", table_id)
row_iterator_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=bqstorage_instance_mock
+ bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY,
)
@@ -1175,7 +1185,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch):
bqstorage_mock.assert_not_called()
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=magics.context.progress_bar_type,
)
assert isinstance(return_value, pandas.DataFrame)
diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py
index c7c25e036..e0a992f78 100644
--- a/tests/unit/test_retry.py
+++ b/tests/unit/test_retry.py
@@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self):
exc = requests.exceptions.ChunkedEncodingError()
self.assertTrue(self._call_fut(exc))
+ def test_w_unstructured_requests_connecttimeout(self):
+ exc = requests.exceptions.ConnectTimeout()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_readtimeout(self):
+ exc = requests.exceptions.ReadTimeout()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_timeout(self):
+ exc = requests.exceptions.Timeout()
+ self.assertTrue(self._call_fut(exc))
+
def test_w_auth_transporterror(self):
from google.auth.exceptions import TransportError
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 0ff2c9258..ed9ed5d0f 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -14,6 +14,7 @@
import datetime
import logging
+import re
import time
import types
import unittest
@@ -36,6 +37,11 @@
except (ImportError, AttributeError): # pragma: NO COVER
pandas = None
+try:
+ import geopandas
+except (ImportError, AttributeError): # pragma: NO COVER
+ geopandas = None
+
try:
from tqdm import tqdm
except (ImportError, AttributeError): # pragma: NO COVER
@@ -1826,6 +1832,27 @@ def test_to_dataframe_iterable(self):
self.assertEqual(len(df), 0) # Verify the number of rows.
self.assertEqual(len(df.columns), 0)
+ @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+ def test_to_geodataframe_if_geopandas_is_none(self):
+ row_iterator = self._make_one()
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe(self):
+ row_iterator = self._make_one()
+ df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 0) # verify the number of rows
+ self.assertEqual(df.crs.srs, "EPSG:4326")
+ self.assertEqual(df.crs.name, "WGS 84")
+
class TestRowIterator(unittest.TestCase):
def _class_under_test(self):
@@ -1863,6 +1890,16 @@ def _make_one(
client, api_request, path, schema, table=table, **kwargs
)
+ def _make_one_from_data(self, schema=(), rows=()):
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [SchemaField(*a) for a in schema]
+ rows = [{"f": [{"v": v} for v in row]} for row in rows]
+
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ return self._make_one(_mock_client(), api_request, path, schema)
+
def test_constructor(self):
from google.cloud.bigquery.table import _item_to_row
from google.cloud.bigquery.table import _rows_page_start
@@ -3076,6 +3113,18 @@ def test_to_dataframe_error_if_pandas_is_none(self):
with self.assertRaises(ValueError):
row_iterator.to_dataframe()
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ @mock.patch("google.cloud.bigquery.table.shapely", new=None)
+ def test_to_dataframe_error_if_shapely_is_none(self):
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The shapely library is not installed, please install "
+ "shapely to use the geography_as_object option."
+ ),
+ ):
+ self._make_one_from_data().to_dataframe(geography_as_object=True)
+
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_max_results_w_bqstorage_warning(self):
from google.cloud.bigquery.schema import SchemaField
@@ -3790,6 +3839,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_dataframe_geography_as_object(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)"),
+ ("bar", None),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+ ),
+ )
+ df = row_iterator.to_dataframe(
+ create_bqstorage_client=False, geography_as_object=True,
+ )
+ self.assertIsInstance(df, pandas.DataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "object")
+ self.assertIsInstance(df.geog, pandas.Series)
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"]
+ )
+
+ @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+ def test_to_geodataframe_error_if_geopandas_is_none(self):
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+ ),
+ ):
+ self._make_one_from_data().to_geodataframe()
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)"),
+ ("bar", None),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+ ),
+ )
+ df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "geometry")
+ self.assertIsInstance(df.geog, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(df.crs.srs, "EPSG:4326")
+ self.assertEqual(df.crs.name, "WGS 84")
+ self.assertEqual(df.geog.crs.srs, "EPSG:4326")
+ self.assertEqual(df.geog.crs.name, "WGS 84")
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_ambiguous_geog(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+ )
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "There is more than one GEOGRAPHY column in the result. "
+ "The geography_column argument must be used to specify which "
+ "one to use to create a GeoDataFrame"
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_bad_geography_column(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+ )
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The given geography column, xxx, doesn't name"
+ " a GEOGRAPHY column in the result."
+ ),
+ ):
+ row_iterator.to_geodataframe(
+ create_bqstorage_client=False, geography_column="xxx"
+ )
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_no_geog(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "STRING")), ()
+ )
+ with self.assertRaisesRegex(
+ TypeError,
+ re.escape(
+ "There must be at least one GEOGRAPHY column"
+ " to create a GeoDataFrame"
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_w_geography_column(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)", "Point(1 1)"),
+ ("bar", None, "Point(2 2)"),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"),
+ ),
+ )
+ df = row_iterator.to_geodataframe(
+ create_bqstorage_client=False, geography_column="geog"
+ )
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "geometry")
+ self.assertEqual(df.geog2.dtype.name, "object")
+ self.assertIsInstance(df.geog, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"]
+ )
+
+ # Geog2 isn't a GeoSeries, but it contains geomentries:
+ self.assertIsInstance(df.geog2, pandas.Series)
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"]
+ )
+ # and can easily be converted to a GeoSeries
+ self.assertEqual(
+ list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"]
+ )
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe")
+ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
+ """
+ RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe.
+
+ This test just demonstrates that. We don't need to test all the
+ variations, which are tested for to_dataframe.
+ """
+ import numpy
+ from shapely import wkt
+
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("g", "GEOGRAPHY"))
+ )
+ bqstorage_client = object()
+ dtypes = dict(xxx=numpy.dtype("int64"))
+ progress_bar_type = "normal"
+ create_bqstorage_client = False
+ date_as_object = False
+ geography_column = "g"
+
+ to_dataframe.return_value = pandas.DataFrame(
+ dict(name=["foo"], g=[wkt.loads("point(0 0)")],)
+ )
+
+ df = row_iterator.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
+ )
+
+ to_dataframe.assert_called_once_with(
+ bqstorage_client,
+ dtypes,
+ progress_bar_type,
+ create_bqstorage_client,
+ date_as_object,
+ geography_as_object=True,
+ )
+
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 1) # verify the number of rows
+ self.assertEqual(list(df), ["name", "g"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.g.dtype.name, "geometry")
+ self.assertIsInstance(df.g, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0"])
+ self.assertEqual(list(map(str, df.g.area)), ["0.0"])
+ self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"])
+
class TestPartitionRange(unittest.TestCase):
def _get_target_class(self):