Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery): expose date_as_object parameter to users #150

Merged
merged 6 commits into from Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions google/cloud/bigquery/job.py
Expand Up @@ -3320,6 +3320,7 @@ def to_dataframe(
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=True,
date_as_object=True,
):
"""Return a pandas DataFrame from a QueryJob

Expand Down Expand Up @@ -3360,6 +3361,12 @@ def to_dataframe(

..versionadded:: 1.24.0

date_as_object (bool):
plamut marked this conversation as resolved.
Show resolved Hide resolved
Optional. If ``True`` (default), cast dates to objects.
If False, convert to datetime64[ns] dtype.
plamut marked this conversation as resolved.
Show resolved Hide resolved

..versionadded:: 1.26.0

Returns:
A :class:`~pandas.DataFrame` populated with row data and column
headers from the query results. The column headers are derived
Expand All @@ -3373,6 +3380,7 @@ def to_dataframe(
dtypes=dtypes,
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
date_as_object=date_as_object,
)

def __iter__(self):
Expand Down
10 changes: 9 additions & 1 deletion google/cloud/bigquery/table.py
Expand Up @@ -1633,6 +1633,7 @@ def to_dataframe(
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=True,
date_as_object=True,
):
"""Create a pandas DataFrame by loading all pages of a query.

Expand Down Expand Up @@ -1683,6 +1684,12 @@ def to_dataframe(

..versionadded:: 1.24.0

date_as_object (bool):
Optional. If ``True`` (default), cast dates to objects.
If False, convert to datetime64[ns] dtype.

..versionadded:: 1.26.0

Returns:
pandas.DataFrame:
A :class:`~pandas.DataFrame` populated with row data and column
Expand Down Expand Up @@ -1722,7 +1729,7 @@ def to_dataframe(
bqstorage_client=bqstorage_client,
create_bqstorage_client=create_bqstorage_client,
)
df = record_batch.to_pandas()
df = record_batch.to_pandas(date_as_object=date_as_object)
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
return df
Expand Down Expand Up @@ -1799,6 +1806,7 @@ def to_dataframe(
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=True,
date_as_object=True,
plamut marked this conversation as resolved.
Show resolved Hide resolved
):
"""Create an empty dataframe.

Expand Down
41 changes: 40 additions & 1 deletion tests/unit/test_job.py
Expand Up @@ -5504,7 +5504,15 @@ def test_to_dataframe_column_dtypes(self):
},
}
row_data = [
["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"],
[
"1.4338368E9",
"420",
"1.1",
"1.77",
"Cto_dataframeash",
"true",
"1999-12-01",
],
["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"],
["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"],
]
Expand Down Expand Up @@ -5533,6 +5541,37 @@ def test_to_dataframe_column_dtypes(self):
self.assertEqual(df.complete.dtype.name, "bool")
self.assertEqual(df.date.dtype.name, "object")

@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_column_date_dtypes(self):
begun_resource = self._make_resource()
query_resource = {
"jobComplete": True,
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"totalRows": "1",
"schema": {"fields": [{"name": "date", "type": "DATE"}]},
}
row_data = [
["1999-12-01"],
]
rows = [{"f": [{"v": field} for field in row]} for row in row_data]
query_resource["rows"] = rows
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
connection = _make_connection(
begun_resource, query_resource, done_resource, query_resource
)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)

df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 1) # verify the number of rows
exp_columns = [field["name"] for field in query_resource["schema"]["fields"]]
self.assertEqual(list(df), exp_columns) # verify the column names

self.assertEqual(df.date.dtype.name, "datetime64[ns]")
plamut marked this conversation as resolved.
Show resolved Hide resolved

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(tqdm is None, "Requires `tqdm`")
@mock.patch("tqdm.tqdm")
Expand Down