From 2cb1c210b00328161c3dcb09966bc849c99537aa Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 9 Sep 2021 16:25:40 +0200 Subject: [PATCH] chore: sync v3 branch with main (#947) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name * fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. * feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 * chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) * fix: `insert_rows()` accepts float column values as strings again (#824) * chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore: add second protection rule for v3 branch (#828) * chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) * test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) * chore(deps): update dependency pyarrow to v5 (#834) * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) * deps: expand pyarrow pins to support 5.x releases (#833) * chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) * chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) * chore(deps): update dependency google-cloud-testutils to v1 (#845) * chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 * chore: add api-bigquery as a samples owner (#852) * fix: increase default retry deadline to 10 minutes (#859) The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 * process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. * chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast * chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot * feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. * feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) * feat: retry failed query jobs in `result()` (#837) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). * fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver * chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) * test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests * chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot * fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast * chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) * feat: Support using GeoPandas for GEOGRAPHY columns (#848) * test: Add test of datetime and time pandas load (#895) * chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton * chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> * chore(deps): update dependency google-cloud-core to v2 (#904) * fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) * fix: populate default `timeout` and retry after client-side timeout (#896) This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 * chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore(deps): update dependency pandas to v1.3.2 (#900) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore: group all renovate PRs together (#911) This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 * chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * docs: update docstring for bigquery_create_routine sample (#883) (#917) Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> * chore: migrate default branch to main (#910) * chore: migrate default branch to main * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Add owlbot replacements to persist changes * Manually apply new replacements from owlbot.py * Move temp replacement rules after s.move() Co-authored-by: Owl Bot * chore: invalid docstrings broke docfx (#924) * chore(deps): update all dependencies (#914) * chore(deps): update all dependencies * Python version modifiers for pyproj Co-authored-by: Tim Swast * fix: error inserting DataFrame with REPEATED field (#925) Co-authored-by: Tim Swast * chore(deps): update all dependencies (#926) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.25.0` -> `==2.25.1` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/compatibility-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/confidence-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-cloud-testutils](https://togithub.com/googleapis/python-test-utils) | `==1.0.0` -> `==1.1.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/compatibility-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/confidence-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [importlib-metadata](https://togithub.com/python/importlib_metadata) | `==4.6.4` -> `==4.8.1` | [![age](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/compatibility-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/confidence-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | | [pytest](https://docs.pytest.org/en/latest/) ([source](https://togithub.com/pytest-dev/pytest), [changelog](https://docs.pytest.org/en/stable/changelog.html)) | `==6.2.4` -> `==6.2.5` | [![age](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/compatibility-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/confidence-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://togithub.com/python/typing) | `==3.10.0.0` -> `==3.10.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/compatibility-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/confidence-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2251-httpswwwgithubcomgoogleapispython-bigquerycomparev2250v2251-2021-08-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1)
googleapis/python-test-utils ### [`v1.1.0`](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0) [Compare Source](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0)
googleapis/python-crc32c ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
python/importlib_metadata ### [`v4.8.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v481) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.8.0...v4.8.1) \====== - [#​348](https://togithub.com/python/importlib_metadata/issues/348): Restored support for `EntryPoint` access by item, deprecating support in the process. Users are advised to use direct member access instead of item-based access:: - ep\[0] -> ep.name - ep\[1] -> ep.value - ep\[2] -> ep.group - ep\[:] -> ep.name, ep.value, ep.group ### [`v4.8.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v480) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.1...v4.8.0) \====== - [#​337](https://togithub.com/python/importlib_metadata/issues/337): Rewrote `EntryPoint` as a simple class, still immutable and still with the attributes, but without any expectation for `namedtuple` functionality such as `_asdict`. ### [`v4.7.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v471) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.0...v4.7.1) \====== - [#​344](https://togithub.com/python/importlib_metadata/issues/344): Fixed regression in `packages_distributions` when neither top-level.txt nor a files manifest is present. ### [`v4.7.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v470) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.6.4...v4.7.0) \====== - [#​330](https://togithub.com/python/importlib_metadata/issues/330): In `packages_distributions`, now infer top-level names from `.files()` when a `top-level.txt` (Setuptools-specific metadata) is not present.
pytest-dev/pytest ### [`v6.2.5`](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5) [Compare Source](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5)
python/typing ### [`v3.10.0.2`](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) ### [`v3.10.0.1`](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * fix: underscores weren't allowed in struct field names when passing parameters to the DB API (#930) * chore: release 2.25.2 (#916) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update all dependencies (#928) * fix: guard imports against unsupported pyarrow versions (#934) * fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype * feat: set the X-Server-Timeout header when timeout is set (#927) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #919 🦕 * chore: release 2.26.0 (#937) :robot: I have created a release \*beep\* \*boop\* --- ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) ### Features * set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) ### Bug Fixes * guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * chore(deps): update dependency google-cloud-bigquery to v2.26.0 (#938) * chore: update system tests and samples to use and @google.com email address (#942) * chore: update system tests and samples to use and @google.com email address * Add group prefix * fixed access entry some more * chore(python): rename default branch to main (#935) Source-Link: https://github.com/googleapis/synthtool/commit/5c0fa62eea9c33ebe61e582424b659eb264e1ba4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: Peter Lamut Co-authored-by: Anthonios Partheniou * chore: Reduce duplicated code betweem tests/unit and tests/unit/job (#940) * chore: Reduce duplicated code betweem tests/unit and tests/unit/job * reuse parent make_client * test: fix routine DDL sample test exits too early (#932) Co-authored-by: Tres Seaver Co-authored-by: Tim Swast * chore(deps): update all dependencies (#939) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.3` -> `==2.7.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/compatibility-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/confidence-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.3` -> `==1.1.4` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/compatibility-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/confidence-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-resumable-media](https://togithub.com/googleapis/google-resumable-media-python) | `==2.0.1` -> `==2.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/compatibility-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/confidence-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.7.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​270-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev263v270-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.3...v2.7.0) ##### Features - **v1beta2:** Align ReadRows timeout with other versions of the API ([#​293](https://www.togithub.com/googleapis/python-bigquery-storage/issues/293)) ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### Documentation - **v1beta2:** Align session length with public documentation ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### [2.6.3](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3) (2021-08-06) ##### Bug Fixes - resume read stream on `Unknown` transport-layer exception ([#​263](https://www.togithub.com/googleapis/python-bigquery-storage/issues/263)) ([127caa0](https://www.github.com/googleapis/python-bigquery-storage/commit/127caa06144b9cec04b23914b561be6a264bcb36)) ##### [2.6.2](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.1...v2.6.2) (2021-07-28) ##### Bug Fixes - enable self signed jwt for grpc ([#​249](https://www.togithub.com/googleapis/python-bigquery-storage/issues/249)) ([a7e8d91](https://www.github.com/googleapis/python-bigquery-storage/commit/a7e8d913fc3de67a3f38ecbd35af2f9d1a33aa8d)) ##### Documentation - remove duplicate code samples ([#​246](https://www.togithub.com/googleapis/python-bigquery-storage/issues/246)) ([303f273](https://www.github.com/googleapis/python-bigquery-storage/commit/303f2732ced38e491df92e965dd37bac24a61d2f)) - add Samples section to CONTRIBUTING.rst ([#​241](https://www.togithub.com/googleapis/python-bigquery-storage/issues/241)) ([5d02358](https://www.github.com/googleapis/python-bigquery-storage/commit/5d02358fbd397cafcc1169d829859fe2dd568645)) ##### [2.6.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.0...v2.6.1) (2021-07-20) ##### Bug Fixes - **deps:** pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions ([#​240](https://www.togithub.com/googleapis/python-bigquery-storage/issues/240)) ([8f848e1](https://www.github.com/googleapis/python-bigquery-storage/commit/8f848e18379085160492cdd2d12dc8de50a46c8e)) ##### Documentation - pandas DataFrame samples are more standalone ([#​224](https://www.togithub.com/googleapis/python-bigquery-storage/issues/224)) ([4026997](https://www.github.com/googleapis/python-bigquery-storage/commit/4026997d7a286b63ed2b969c0bd49de59635326d))
googleapis/python-crc32c ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4)
googleapis/google-resumable-media-python ### [`v2.0.2`](https://togithub.com/googleapis/google-resumable-media-python/blob/master/CHANGELOG.md#​202-httpswwwgithubcomgoogleapisgoogle-resumable-media-pythoncomparev201v202-2021-09-02) [Compare Source](https://togithub.com/googleapis/google-resumable-media-python/compare/v2.0.1...v2.0.2)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * Remove unneeded file * Remove unneeded legacy pyarrow import in noxfile Co-authored-by: Tim Swast Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: WhiteSource Renovate Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Jim Fulton Co-authored-by: Grimmer Co-authored-by: Tres Seaver Co-authored-by: Dina Graves Portman Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 2 +- .github/sync-repo-settings.yaml | 8 +- .kokoro/build.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- CHANGELOG.md | 47 ++++ CONTRIBUTING.rst | 16 +- docs/conf.py | 12 +- docs/usage/pandas.rst | 14 ++ google/cloud/bigquery/_helpers.py | 24 ++ google/cloud/bigquery/_pandas_helpers.py | 79 ++++++- google/cloud/bigquery/client.py | 150 +++++++----- google/cloud/bigquery/dbapi/_helpers.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 2 +- google/cloud/bigquery/job/copy_.py | 2 +- google/cloud/bigquery/job/extract.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- google/cloud/bigquery/job/query.py | 119 +++++++++- google/cloud/bigquery/magics/magics.py | 12 +- google/cloud/bigquery/retry.py | 8 + google/cloud/bigquery/table.py | 198 +++++++++++++++- google/cloud/bigquery/version.py | 2 +- noxfile.py | 6 +- owlbot.py | 76 +++++- renovate.json | 2 +- samples/create_routine.py | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 51 +++- samples/geography/to_geodataframe.py | 32 +++ samples/geography/to_geodataframe_test.py | 25 ++ samples/snippets/requirements-test.txt | 4 +- samples/snippets/requirements.txt | 8 +- samples/tests/test_routine_samples.py | 1 - samples/update_dataset_access.py | 4 +- setup.py | 1 + testing/constraints-3.6.txt | 2 + tests/system/test_client.py | 3 - tests/system/test_pandas.py | 207 ++++++++++++++++- tests/unit/conftest.py | 19 ++ tests/unit/job/helpers.py | 22 +- tests/unit/job/test_base.py | 13 +- tests/unit/job/test_copy.py | 21 +- tests/unit/job/test_extract.py | 21 +- tests/unit/job/test_load.py | 41 ++-- tests/unit/job/test_query.py | 56 ++--- tests/unit/job/test_query_pandas.py | 168 +++++++++++--- tests/unit/test__helpers.py | 34 +++ tests/unit/test__pandas_helpers.py | 163 +++++++++++-- tests/unit/test_client.py | 269 +++++++++++++--------- tests/unit/test_create_dataset.py | 19 +- tests/unit/test_dbapi__helpers.py | 8 +- tests/unit/test_dbapi_cursor.py | 4 + tests/unit/test_delete_dataset.py | 7 +- tests/unit/test_list_datasets.py | 11 +- tests/unit/test_list_jobs.py | 19 +- tests/unit/test_list_models.py | 12 +- tests/unit/test_list_projects.py | 11 +- tests/unit/test_list_routines.py | 12 +- tests/unit/test_list_tables.py | 16 +- tests/unit/test_magics.py | 26 ++- tests/unit/test_retry.py | 12 + tests/unit/test_table.py | 242 +++++++++++++++++++ 61 files changed, 1903 insertions(+), 454 deletions(-) create mode 100644 samples/geography/to_geodataframe.py create mode 100644 samples/geography/to_geodataframe_test.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 649877dc4..c07f148f0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 8634a3043..6572e5982 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -1,9 +1,9 @@ -# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings -# Rules for master branch protection +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `master` -- pattern: master +# Defaults to `main` +- pattern: main requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 302cc1e1a..4d6a1d0f6 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox python3 -m nox --version # If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then cleanup() { chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 311a8d54b..8a324c9c7 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/CHANGELOG.md b/CHANGELOG.md index 83b409015..5a3cb6bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,53 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) + + +### Features + +* set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) + + +### Bug Fixes + +* guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) + +### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) + + +### Bug Fixes + +* error inserting DataFrame with REPEATED field ([#925](https://www.github.com/googleapis/python-bigquery/issues/925)) ([656d2fa](https://www.github.com/googleapis/python-bigquery/commit/656d2fa6f870573a21235c83463752a2d084caba)) +* underscores weren't allowed in struct field names when passing parameters to the DB API ([#930](https://www.github.com/googleapis/python-bigquery/issues/930)) ([fcb0bc6](https://www.github.com/googleapis/python-bigquery/commit/fcb0bc68c972c2c98bb8542f54e9228308177ecb)) + + +### Documentation + +* update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05)) + +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2faf5aed3..8aecf9dd2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout: # Configure remotes such that you can pull changes from the googleapis/python-bigquery # repository into your local repository. $ git remote add upstream git@github.com:googleapis/python-bigquery.git - # fetch and merge changes from upstream into master + # fetch and merge changes from upstream into main $ git fetch upstream - $ git merge upstream/master + $ git merge upstream/main Now your local repo is set up such that you will push changes to your GitHub repo, from which you can submit a pull request. @@ -110,12 +110,12 @@ Coding Style variables:: export GOOGLE_CLOUD_TESTING_REMOTE="upstream" - export GOOGLE_CLOUD_TESTING_BRANCH="master" + export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date - version of ``python-bigquery``. The the suggested remote name ``upstream`` - should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``master``). + version of ``python-bigquery``. The + remote name ``upstream`` should point to the official ``googleapis`` + checkout and the branch should be the default branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``) +``https://github.com/googleapis/python-bigquery/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-bigquery @@ -234,7 +234,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py +.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py We also explicitly decided to support Python 3 beginning with version 3.6. diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..07e5d8c30 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,8 +76,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = "google-cloud-bigquery" @@ -281,7 +281,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery.tex", "google-cloud-bigquery Documentation", author, @@ -316,7 +316,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", [author], @@ -335,7 +335,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", author, @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 40732a298..109259711 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -58,6 +58,20 @@ The following data types are used when creating a pandas DataFrame. - Int64 - +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index cb2ce40a3..e7f5bd59b 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -74,7 +74,31 @@ def is_read_session_optional(self) -> bool: return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + BQ_STORAGE_VERSIONS = BQStorageVersions() +PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 88759bd18..a627f5226 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -25,10 +25,40 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy import pyarrow import pyarrow.parquet +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() + try: from google.cloud.bigquery_storage import ArrowSerializationOptions except ImportError: @@ -71,6 +101,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -191,14 +222,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -245,7 +278,24 @@ def bq_schema_to_nullsafe_pandas_dtypes( def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -299,6 +349,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -339,6 +395,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -463,11 +526,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( @@ -791,7 +854,13 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if pandas.isna(value): + is_nan = pandas.isna(value) + + # isna() can also return an array-like of bools, but the latter's boolean + # value is ambiguous, hence an extra check. An array-like value is *not* + # considered a NaN, however. + if isinstance(is_nan, bool) and is_nan: continue output[column] = value + yield output diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e2863e6a5..a738dd0f3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -62,17 +62,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -107,6 +114,8 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +TIMEOUT_HEADER = "X-Server-Timeout" + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -228,7 +237,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -275,7 +284,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -341,7 +350,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -512,7 +521,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -587,7 +596,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -642,7 +651,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -698,23 +707,33 @@ def create_table( return self.get_table(table.reference, retry=retry) def _call_api( - self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs + self, + retry, + span_name=None, + span_attributes=None, + job_ref=None, + headers: Optional[Dict[str, str]] = None, + **kwargs, ): + kwargs = _add_server_timeout_header(headers, kwargs) call = functools.partial(self._connection.api_request, **kwargs) + if retry: call = retry(call) + if span_name is not None: with create_span( name=span_name, attributes=span_attributes, client=self, job_ref=job_ref ): return call() + return call() def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -758,7 +777,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -788,7 +807,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -821,7 +840,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -846,7 +865,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -889,7 +908,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -933,7 +952,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -975,7 +994,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1045,7 +1064,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1109,7 +1128,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1183,7 +1202,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1249,7 +1268,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1326,7 +1345,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1403,7 +1422,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1478,7 +1497,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1537,7 +1556,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1587,12 +1606,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1600,26 +1619,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1660,7 +1673,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1714,7 +1727,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1767,7 +1780,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1856,7 +1869,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1951,7 +1964,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2025,7 +2038,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2102,7 +2115,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2217,7 +2230,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2301,7 +2314,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2404,7 +2417,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2622,7 +2635,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2905,7 +2918,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3008,7 +3021,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3106,7 +3119,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3388,7 +3401,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3523,7 +3536,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3573,7 +3586,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3685,7 +3698,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See @@ -3988,3 +4001,16 @@ def _get_upload_headers(user_agent): "User-Agent": user_agent, "content-type": "application/json", } + + +def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs): + timeout = kwargs.get("timeout") + if timeout is not None: + if headers is None: + headers = {} + headers[TIMEOUT_HEADER] = str(timeout) + + if headers: + kwargs["headers"] = headers + + return kwargs diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 9c134b47c..72e711bcf 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -173,7 +173,7 @@ def _parse_type( \s* (ARRAY|STRUCT|RECORD) # Type \s* - <([A-Z0-9<> ,()]+)> # Subtype(s) + <([A-Z0-9_<> ,()]+)> # Subtype(s) \s*$ """, re.IGNORECASE | re.VERBOSE, diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 587598d5f..b1239ff57 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -494,7 +494,7 @@ def _extract_types( ([^:)]*) # name (?:: # ':' introduces type ( # start of type group - [a-zA-Z0-9<>, ]+ # First part, no parens + [a-zA-Z0-9_<>, ]+ # First part, no parens (?: # start sets of parens + non-paren text \([0-9 ,]+\) # comma-separated groups of digits in parens diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index c6ee98944..f0dd3d668 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -240,7 +240,7 @@ def to_api_repr(self): def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index 3373bcdef..52aa036c9 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -244,7 +244,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index aee055c1c..b12c3e621 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -800,7 +800,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ca3ffb2bf..c07daec99 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1482,6 +1483,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1533,13 +1535,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1548,6 +1564,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index dce911232..60670167e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index e9286055c..830582322 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 7387f58c1..c44289324 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -31,6 +31,20 @@ import pyarrow +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -45,17 +59,25 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER - from google.cloud import bigquery_storage - # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas + from google.cloud import bigquery_storage _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1726,6 +1748,7 @@ def to_arrow( .. versionadded:: 1.17.0 """ self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1850,6 +1873,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1904,6 +1928,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1912,11 +1943,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1966,8 +2004,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2018,6 +2184,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2035,6 +2202,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 84f6b4643..1f7d79ab9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.26.0" diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..dbf6a163c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -94,7 +94,7 @@ def unit(session): default(session) -@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" default(session, install_extras=False) @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/owlbot.py b/owlbot.py index 09845480a..09aa8ca6f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "geopandas": "https://geopandas.org/", + }, ) # BigQuery has a custom multiprocessing note @@ -109,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -121,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -136,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -156,7 +165,56 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), +) + + +# Remove the replacements below once +# https://github.com/googleapis/synthtool/pull/1188 is merged + +# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files +s.replace( + ".kokoro/*.sh", + "repo-automation-bots/tree/master", + "repo-automation-bots/tree/main", +) + +# Customize CONTRIBUTING.rst to replace master with main +s.replace( + "CONTRIBUTING.rst", + "fetch and merge changes from upstream into master", + "fetch and merge changes from upstream into main", +) + +s.replace( + "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", +) + +s.replace( + "CONTRIBUTING.rst", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", +) + +s.replace( + "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", +) + +s.replace( + "docs/conf.py", "master_doc", "root_doc", +) + +s.replace( + "docs/conf.py", "# The master toctree document.", "# The root toctree document.", ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index c04895563..713c60bb4 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { diff --git a/samples/create_routine.py b/samples/create_routine.py index 012c7927a..1cb4a80b4 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine( diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index b0cf76724..5d836a5c5 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.4 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dfee339d4..8fb578018 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,51 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 -google-cloud-bigquery==2.24.0 -google-cloud-bigquery-storage==2.6.3 +geopandas==0.9.0 +google-api-core==2.0.1 +google-auth==2.0.2 +google-cloud-bigquery==2.26.0 +google-cloud-bigquery-storage==2.7.0 +google-cloud-core==2.0.0 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.8.1 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" +packaging==21.0 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1; python_version < "3.7" +pyproj==3.1.0; python_version > "3.6" +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.2 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py new file mode 100644 index 000000000..fa8073fef --- /dev/null +++ b/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000..7a2ba6937 --- /dev/null +++ b/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b8dee50d0..caa48813a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.0.0 -pytest==6.2.4 +google-cloud-testutils==1.1.0 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 264899dff..e096af157 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,12 +1,12 @@ -google-cloud-bigquery==2.24.0 -google-cloud-bigquery-storage==2.6.3 -google-auth-oauthlib==0.4.5 +google-cloud-bigquery==2.26.0 +google-cloud-bigquery-storage==2.7.0 +google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index 59ec1fae9..c1b0bb5a7 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -32,7 +32,6 @@ def test_create_routine_ddl(capsys, random_routine_id, client): out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out - return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index 6e844cc90..a5c2670e7 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -28,8 +28,8 @@ def update_dataset_access(dataset_id): entry = bigquery.AccessEntry( role="READER", - entity_type="userByEmail", - entity_id="sample.bigquery.dev@gmail.com", + entity_type="groupByEmail", + entity_id="cloud-developer-relations@google.com", ) entries = list(dataset.access_entries) diff --git a/setup.py b/setup.py index 6fa619d37..f1464e77a 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], "pandas": ["pandas>=1.0.0"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index bf1f89f58..6e27172b2 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -18,5 +19,6 @@ proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +Shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c6896da14..4b9868f10 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2351,9 +2351,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 2bd496e83..075d3b680 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -278,8 +278,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -287,7 +285,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -312,14 +310,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -899,3 +897,190 @@ def test_list_rows_nullable_scalars_extreme_dtypes( # pandas uses Python string and bytes objects. assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["string_col"].name == "object" + + +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 7a67ea6b5..feba65aa5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import pytest from .helpers import make_client @@ -35,3 +36,21 @@ def DS_ID(): @pytest.fixture def LOCATION(): yield "us-central" + + +def noop_add_server_timeout_header(headers, kwargs): + if headers: + kwargs["headers"] = headers + return kwargs + + +@pytest.fixture(autouse=True) +def disable_add_server_timeout_header(request): + if "enable_add_server_timeout_header" in request.keywords: + yield + else: + with mock.patch( + "google.cloud.bigquery.client._add_server_timeout_header", + noop_add_server_timeout_header, + ): + yield diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index c792214e7..3642c7229 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -14,36 +14,20 @@ import unittest -import mock from google.api_core import exceptions - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) +from ..helpers import make_connection, make_client as __make_client def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - + client = __make_client(project) if connection is None: - connection = _make_connection() + connection = make_connection() - client = Client(project=project, credentials=_make_credentials(), _http=object()) client._connection = connection return client -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - def _make_retriable_exception(): return exceptions.TooManyRequests( "retriable exception", errors=[{"reason": "rateLimitExceeded"}] diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index c3f7854e3..aa8e9c045 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -21,8 +21,9 @@ import mock import pytest +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_retriable_exception from .helpers import _make_job_resource @@ -740,7 +741,7 @@ def test_cancel_defaults(self): response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) + connection = job._client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -769,7 +770,7 @@ def test_cancel_explicit(self): response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) + connection = client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -930,7 +931,7 @@ def test_result_default_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( _make_retriable_exception(), begun_job_resource, _make_retriable_exception(), @@ -968,7 +969,7 @@ def test_result_w_retry_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, exceptions.NotFound("not normally retriable"), @@ -1008,7 +1009,7 @@ def test_result_w_retry_wo_state(self): ) def test_result_explicit_w_state(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) # Use _set_properties() instead of directly modifying _properties so diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index 992efcf6b..d94e5bc88 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestCopyJobConfig(_Base): @@ -333,7 +334,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -396,9 +397,9 @@ def test_begin_w_alternate_client(self): "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -427,7 +428,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -446,9 +447,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -468,7 +469,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -488,9 +489,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 4c9411d0d..8bada51af 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestExtractJobConfig(_Base): @@ -265,7 +266,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -318,9 +319,9 @@ def test_begin_w_alternate_client(self): "printHeader": False, } RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -353,7 +354,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client @@ -371,9 +372,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 @@ -395,7 +396,7 @@ def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -416,9 +417,9 @@ def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 70e7860a7..cf2096b8b 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -16,9 +16,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestLoadJob(_Base): @@ -238,7 +239,7 @@ def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) + connection = make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection @@ -421,7 +422,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties["status"] = {"state": "RUNNING"} @@ -436,7 +437,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) path = "/projects/{}/jobs".format(self.PROJECT) @@ -478,7 +479,7 @@ def test_begin_w_autodetect(self): del resource["etag"] del resource["selfLink"] del resource["user_email"] - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True @@ -559,9 +560,9 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") @@ -611,7 +612,7 @@ def test_begin_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -634,7 +635,7 @@ def test_begin_w_job_reference(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -654,9 +655,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -679,7 +680,7 @@ def test_exists_miss_w_job_reference(self): from google.cloud.bigquery import job job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -701,7 +702,7 @@ def test_exists_miss_w_job_reference(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -719,9 +720,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -744,7 +745,7 @@ def test_reload_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -769,7 +770,7 @@ def test_cancel_w_bound_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) + conn = make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -788,9 +789,9 @@ def test_cancel_w_alternate_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) + conn2 = make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -813,7 +814,7 @@ def test_cancel_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) + conn = make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index d41370520..4c598d797 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -26,9 +26,11 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query + +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestQueryJob(_Base): @@ -943,7 +945,7 @@ def test_result(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection( + conn = make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) @@ -1005,7 +1007,7 @@ def test_result_with_done_job_calls_get_query_results(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, results_page_resource) + conn = make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1052,7 +1054,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, query_page_resource) + connection = make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1096,7 +1098,7 @@ def test_result_w_retry(self): "tableId": "dest_table", } - connection = _make_connection( + connection = make_connection( exceptions.NotFound("not normally retriable"), query_resource, exceptions.NotFound("not normally retriable"), @@ -1144,7 +1146,7 @@ def test_result_w_empty_schema(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, } - connection = _make_connection(query_resource, query_resource) + connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1165,7 +1167,7 @@ def test_result_invokes_begins(self): query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, incomplete_resource, query_resource, @@ -1196,7 +1198,7 @@ def test_result_w_timeout(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -1245,7 +1247,7 @@ def test_result_w_page_size(self): ], } query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( + conn = make_connection( query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) @@ -1303,7 +1305,7 @@ def test_result_with_start_index(self): {"f": [{"v": "jkl"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = make_connection(query_resource, tabledata_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1428,7 +1430,7 @@ def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1462,7 +1464,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() @@ -1530,9 +1532,9 @@ def test_begin_w_alternate_client(self): } RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -1588,7 +1590,7 @@ def test_begin_w_udf(self): {"resourceUri": RESOURCE_URI}, {"inlineCode": INLINE_UDF_CODE}, ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), @@ -1647,7 +1649,7 @@ def test_begin_w_named_query_parameter(self): "parameterValue": {"value": "123"}, } ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1695,7 +1697,7 @@ def test_begin_w_positional_query_parameter(self): config["queryParameters"] = [ {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1774,7 +1776,7 @@ def test_begin_w_table_defs(self): csv_table: CSV_CONFIG_RESOURCE, } want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.table_definitions = {bt_table: bt_config, csv_table: csv_config} @@ -1818,7 +1820,7 @@ def test_dry_run_query(self): del RESOURCE["selfLink"] del RESOURCE["user_email"] RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True @@ -1846,7 +1848,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1862,9 +1864,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1887,7 +1889,7 @@ def test_reload_w_bound_client(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1919,9 +1921,9 @@ def test_reload_w_alternate_client(self): "datasetId": DS_ID, "tableId": DEST_TABLE, } - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1945,7 +1947,7 @@ def test_reload_w_timeout(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1975,7 +1977,7 @@ def test_iter(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c3a9d2d1a..8e4fba770 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -22,13 +22,26 @@ from google.cloud import bigquery_storage +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_job_resource pandas = pytest.importorskip("pandas") @@ -106,7 +119,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = _make_connection(get_query_results_resource, job_resource) + connection = make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -187,7 +200,7 @@ def test_to_arrow(): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -231,7 +244,7 @@ def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) begun_resource = _make_job_resource(job_type="query") job = target_class.from_api_repr(begun_resource, client) @@ -277,7 +290,7 @@ def test_to_arrow_w_tqdm_w_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -333,7 +346,7 @@ def test_to_arrow_w_tqdm_w_pending_status(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -384,7 +397,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -409,37 +422,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -457,7 +474,7 @@ def test_to_dataframe_ddl_query(): "jobReference": resource["jobReference"], "schema": {"fields": []}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) @@ -481,7 +498,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -525,7 +542,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "totalRows": "4", "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -594,7 +611,7 @@ def test_to_dataframe_column_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -633,7 +650,7 @@ def test_to_dataframe_column_date_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -661,7 +678,7 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource, query_resource, ) client = _make_client(connection=connection) @@ -693,7 +710,7 @@ def test_to_dataframe_w_tqdm_pending(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -748,7 +765,7 @@ def test_to_dataframe_w_tqdm(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -802,7 +819,7 @@ def test_to_dataframe_w_tqdm_max_results(): ] rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -835,3 +852,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index cf60cf749..035f04456 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -21,6 +21,12 @@ class TestBQStorageVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + def _object_under_test(self): from google.cloud.bigquery import _helpers @@ -52,6 +58,34 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +class TestPyarrowVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.PYARROW_VERSIONS._installed_version = None + + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.PyarrowVersions() + + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 5a792527a..0140beb77 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -32,14 +32,19 @@ import pyarrow import pyarrow.types + +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core +from google.cloud import bigquery_storage from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud import bigquery_storage - PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -566,6 +571,57 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -743,6 +799,41 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +def test_dataframe_to_json_generator_repeated_field(module_under_test): + pytest.importorskip( + "pandas", + minversion=str(PANDAS_MINIUM_VERSION), + reason=( + f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " + "which introduces pandas.NA" + ), + ) + + df_data = [ + collections.OrderedDict( + [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] + ), + collections.OrderedDict( + [ + ("repeated_col", ["a", "b", mock.sentinel.foo, "d"]), + ("not_repeated_col", "second"), + ] + ), + ] + dataframe = pandas.DataFrame(df_data) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + + expected = [ + {"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"}, + { + "repeated_col": ["a", "b", mock.sentinel.foo, "d"], + "not_repeated_col": "second", + }, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( @@ -804,7 +895,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): def test_dataframe_to_bq_schema_dict_sequence(module_under_test): df_data = collections.OrderedDict( [ - ("str_column", [u"hello", u"world"]), + ("str_column", ["hello", "world"]), ("int_column", [42, 8]), ("bool_column", [True, False]), ] @@ -988,7 +1079,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema) @@ -1002,7 +1093,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None @@ -1013,7 +1104,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame({"not_in_bq": [1, 2, 3]}), (), None @@ -1046,8 +1137,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1075,8 +1166,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ - {"struct_field": {"one": 2}, "status": u"FOO"}, - {"struct_field": {"two": u"222"}, "status": u"BAR"}, + {"struct_field": {"one": 2}, "status": "FOO"}, + {"struct_field": {"two": "222"}, "status": "BAR"}, ] ) @@ -1095,6 +1186,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( @@ -1107,7 +1220,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55), "date_field": datetime.date(2005, 5, 31), "bytes_field": b"some bytes", - "string_field": u"some characters", + "string_field": "some characters", "numeric_field": decimal.Decimal("123.456"), "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } @@ -1166,13 +1279,13 @@ def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ { - "status": u"FOO", + "status": "FOO", "struct_field": {"one": 1}, - "struct_field_2": {"foo": u"123"}, + "struct_field_2": {"foo": "123"}, }, { - "status": u"BAR", - "struct_field": {"two": u"111"}, + "status": "BAR", + "struct_field": {"two": "111"}, "struct_field_2": {"bar": 27}, }, ] @@ -1206,7 +1319,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) write_table_patch = mock.patch.object( @@ -1479,3 +1592,21 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 458798afa..4c6ec5b4f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -52,6 +52,7 @@ from google.cloud import bigquery_storage from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection @@ -358,7 +359,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -419,7 +420,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -687,7 +690,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -723,7 +726,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -759,11 +762,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -839,7 +844,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -880,7 +885,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -916,7 +921,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -997,7 +1002,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1052,7 +1057,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1091,7 +1096,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1125,7 +1130,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1157,7 +1162,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1192,7 +1197,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1235,9 +1240,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1310,7 +1315,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1419,7 +1424,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1717,7 +1722,6 @@ def test_update_dataset(self): "access": ACCESS, }, path="/" + PATH, - headers=None, timeout=7.5, ) self.assertEqual(ds2.description, ds.description) @@ -1761,8 +1765,7 @@ def test_update_dataset_w_custom_property(self): method="PATCH", data={"newAlphaProperty": "unreleased property"}, path=path, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -1820,7 +1823,7 @@ def test_update_model(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_model.model_id, model.model_id) self.assertEqual(updated_model.description, model.description) @@ -1893,7 +1896,6 @@ def test_update_routine(self): method="PUT", data=sent, path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, timeout=7.5, ) self.assertEqual(actual_routine.arguments, routine.arguments) @@ -2001,7 +2003,7 @@ def test_update_table(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2051,8 +2053,7 @@ def test_update_table_w_custom_property(self): method="PATCH", path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2086,8 +2087,7 @@ def test_update_table_only_use_legacy_sql(self): method="PATCH", path="/%s" % path, data={"view": {"useLegacySql": True}}, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2184,8 +2184,7 @@ def test_update_table_w_query(self): "expirationTime": str(_millis(exp_time)), "schema": schema_resource, }, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2314,7 +2313,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2328,7 +2327,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2353,7 +2352,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2408,7 +2407,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2429,7 +2430,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2483,7 +2486,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2505,7 +2508,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2569,7 +2572,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2591,7 +2596,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): creds = _make_credentials() @@ -2609,7 +2616,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2758,7 +2765,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2798,7 +2805,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2816,7 +2823,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2885,7 +2892,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2904,7 +2911,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -2940,7 +2947,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3066,7 +3073,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3110,7 +3117,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3398,7 +3405,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3460,7 +3467,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3510,7 +3517,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3603,7 +3610,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3709,7 +3716,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3753,7 +3760,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3796,7 +3803,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3841,7 +3848,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4011,7 +4018,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4064,7 +4071,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4120,7 +4127,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4164,7 +4174,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4216,7 +4229,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4301,7 +4317,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4342,7 +4361,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4387,7 +4409,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4458,7 +4480,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4514,7 +4536,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4706,7 +4728,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4774,7 +4796,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4819,7 +4841,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4896,7 +4918,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -4997,7 +5019,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5063,7 +5085,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5097,7 +5119,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5181,7 +5203,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5373,7 +5395,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5426,7 +5451,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5506,7 +5534,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5536,7 +5564,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5563,7 +5591,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5616,7 +5644,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5655,7 +5683,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5850,7 +5878,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5860,7 +5888,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6011,7 +6039,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6081,7 +6109,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6136,7 +6164,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6146,7 +6174,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6319,7 +6347,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6352,7 +6380,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6386,7 +6414,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6448,7 +6476,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6477,7 +6505,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6502,7 +6530,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6539,7 +6567,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6661,7 +6689,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6718,7 +6746,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6771,7 +6799,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6826,7 +6854,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6946,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6983,7 +7011,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7037,7 +7065,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7083,7 +7111,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7124,7 +7152,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7171,7 +7199,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7232,7 +7260,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7306,7 +7334,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7427,7 +7455,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7493,7 +7521,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7531,7 +7559,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7584,7 +7612,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7644,7 +7672,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] @@ -7974,3 +8002,20 @@ def transmit_next_chunk(transport): chunk_size = RU.call_args_list[0][0][1] assert chunk_size == 100 * (1 << 20) + + +@pytest.mark.enable_add_server_timeout_header +@pytest.mark.parametrize("headers", [None, {}]) +def test__call_api_add_server_timeout_w_timeout(client, headers): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/", headers=headers, timeout=42) + client._connection.api_request.assert_called_with( + method="GET", path="/", timeout=42, headers={"X-Server-Timeout": "42"} + ) + + +@pytest.mark.enable_add_server_timeout_header +def test__call_api_no_add_server_timeout_wo_timeout(client): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/") + client._connection.api_request.assert_called_with(method="GET", path="/") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index d07aaed4f..67b21225d 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 4afc47b6c..84c74eeec 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -606,8 +606,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameters,parameter_types,expect", [ ( - [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], - ["ARRAY", "struct"], + [[], dict(name="ch1", b_date=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], [ { "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, @@ -617,13 +617,13 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameterType": { "structTypes": [ {"name": "name", "type": {"type": "STRING"}}, - {"name": "bdate", "type": {"type": "DATE"}}, + {"name": "b_date", "type": {"type": "DATE"}}, ], "type": "STRUCT", }, "parameterValue": { "structValues": { - "bdate": {"value": "2021-01-01"}, + "b_date": {"value": "2021-01-01"}, "name": {"value": "ch1"}, } }, diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index f075bb6f7..07bce986f 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -785,6 +785,10 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), ( "values(%%%%%(foo:struct)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index 3a65e031c..b48beb147 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 7793a7ba6..6f0b55c5e 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index f348be724..1fb40d446 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 4ede9a7dd..b14852338 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index a88540dd5..190612b44 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 069966542..80e62d6bd 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 45d15bed3..8360f6605 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index bb3a8d1fd..01c213e98 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -172,7 +173,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -236,7 +237,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -593,7 +594,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -635,7 +638,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -689,7 +694,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -858,7 +868,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1175,7 +1185,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index c7c25e036..e0a992f78 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0ff2c9258..ed9ed5d0f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -36,6 +37,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -1826,6 +1832,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1863,6 +1890,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3076,6 +3113,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3790,6 +3839,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self):