-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BUG: fix AttributeError with BQ Storage API to download empty results (…
…#310) * BUG: fix AttributeError with BQ Storage API to download empty results Refactors timestamp helpers to their own file to help reduce the size of the gbq module. * blacken * fix lint * fix test_zero_rows * update release date
- Loading branch information
Showing
7 changed files
with
275 additions
and
135 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
"""Helpers for working with TIMESTAMP data type. | ||
Private module. | ||
""" | ||
|
||
|
||
def localize_df(df, schema_fields): | ||
"""Localize any TIMESTAMP columns to tz-aware type. | ||
In pandas versions before 0.24.0, DatetimeTZDtype cannot be used as the | ||
dtype in Series/DataFrame construction, so localize those columns after | ||
the DataFrame is constructed. | ||
Parameters | ||
---------- | ||
schema_fields: sequence of dict | ||
BigQuery schema in parsed JSON data format. | ||
df: pandaas.DataFrame | ||
DataFrame in which to localize TIMESTAMP columns. | ||
Returns | ||
------- | ||
pandas.DataFrame | ||
DataFrame with localized TIMESTAMP columns. | ||
""" | ||
if len(df.index) == 0: | ||
# If there are no rows, there is nothing to do. | ||
# Fix for https://github.com/pydata/pandas-gbq/issues/299 | ||
return df | ||
|
||
for field in schema_fields: | ||
column = str(field["name"]) | ||
if "mode" in field and field["mode"].upper() == "REPEATED": | ||
continue | ||
|
||
if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None: | ||
df[column] = df[column].dt.tz_localize("UTC") | ||
|
||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import google.oauth2.service_account | ||
import pytest | ||
|
||
|
||
@pytest.fixture(params=["env"]) | ||
def project(request, project_id): | ||
if request.param == "env": | ||
return project_id | ||
elif request.param == "none": | ||
return None | ||
|
||
|
||
@pytest.fixture() | ||
def credentials(private_key_path): | ||
return google.oauth2.service_account.Credentials.from_service_account_file( | ||
private_key_path | ||
) | ||
|
||
|
||
@pytest.fixture() | ||
def gbq_connector(project, credentials): | ||
from pandas_gbq import gbq | ||
|
||
return gbq.GbqConnector(project, credentials=credentials) | ||
|
||
|
||
@pytest.fixture() | ||
def random_dataset(bigquery_client, random_dataset_id): | ||
from google.cloud import bigquery | ||
|
||
dataset_ref = bigquery_client.dataset(random_dataset_id) | ||
dataset = bigquery.Dataset(dataset_ref) | ||
bigquery_client.create_dataset(dataset) | ||
return dataset | ||
|
||
|
||
@pytest.fixture() | ||
def tokyo_dataset(bigquery_client, random_dataset_id): | ||
from google.cloud import bigquery | ||
|
||
dataset_ref = bigquery_client.dataset(random_dataset_id) | ||
dataset = bigquery.Dataset(dataset_ref) | ||
dataset.location = "asia-northeast1" | ||
bigquery_client.create_dataset(dataset) | ||
return random_dataset_id | ||
|
||
|
||
@pytest.fixture() | ||
def tokyo_table(bigquery_client, tokyo_dataset): | ||
table_id = "tokyo_table" | ||
# Create a random table using DDL. | ||
# https://github.com/GoogleCloudPlatform/golang-samples/blob/2ab2c6b79a1ea3d71d8f91609b57a8fbde07ae5d/bigquery/snippets/snippet.go#L739 | ||
bigquery_client.query( | ||
"""CREATE TABLE {}.{} | ||
AS SELECT | ||
2000 + CAST(18 * RAND() as INT64) as year, | ||
IF(RAND() > 0.5,"foo","bar") as token | ||
FROM UNNEST(GENERATE_ARRAY(0,5,1)) as r | ||
""".format( | ||
tokyo_dataset, table_id | ||
), | ||
location="asia-northeast1", | ||
).result() | ||
return table_id | ||
|
||
|
||
@pytest.fixture() | ||
def gbq_dataset(project, credentials): | ||
from pandas_gbq import gbq | ||
|
||
return gbq._Dataset(project, credentials=credentials) | ||
|
||
|
||
@pytest.fixture() | ||
def gbq_table(project, credentials, random_dataset_id): | ||
from pandas_gbq import gbq | ||
|
||
return gbq._Table(project, random_dataset_id, credentials=credentials) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.