Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: preserve timestamp microsecond precision with rows from REST API #402

Merged
merged 7 commits into from Dec 4, 2020
13 changes: 12 additions & 1 deletion google/cloud/bigquery/_helpers.py
Expand Up @@ -82,7 +82,18 @@ def _timestamp_from_json(value, field):
"""Coerce 'value' to a datetime, if set or not nullable."""
if _not_null(value, field):
# value will be a float in seconds, to microsecond precision, in UTC.
return _datetime_from_microseconds(1e6 * float(value))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will need to be modified to parse from an rfc timestamp string.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, i didn't get the point. Before passing params["formatOptions.useInt64Timestamp"] = True in client.list_row, received a value in string with decimal in _timestamp_from_json method , but after passing it, received a value in string without decimal and call the method _datetime_from_microseconds(int(value)) which returns datetime object.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. Yes, that's expected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, we should update the comment.

Also, are there cases where floating point values are still passed in? If not, we should clean this up now. If so, let's add a TODO to identify those cases and file an issue to clean them up.


if value is not None:
if type(value) == int:
value = value
elif type(value) == float:
value = 1e6 * float(value)
elif value.isdigit():
value = int(value)
else:
value = 1e6 * float(value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this case needed? Couldn't the above isdigit() check be removed and we always fall back to converting to integer? I don't expect floating point values to be serialized as strings in JSON.


return _datetime_from_microseconds(value)


def _timestamp_query_param_from_json(value, field):
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/bigquery/client.py
Expand Up @@ -3157,6 +3157,7 @@ def list_rows(
if start_index is not None:
params["startIndex"] = start_index

params["formatOptions.useInt64Timestamp"] = True
row_iterator = RowIterator(
client=self,
api_request=functools.partial(self._call_api, retry, timeout=timeout),
Expand Down Expand Up @@ -3237,6 +3238,7 @@ def _list_rows_from_query_results(
if start_index is not None:
params["startIndex"] = start_index

params["formatOptions.useInt64Timestamp"] = True
row_iterator = RowIterator(
client=self,
api_request=functools.partial(self._call_api, retry, timeout=timeout),
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/job/test_query.py
Expand Up @@ -839,6 +839,7 @@ def test_result(self):
query_params={
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
"location": "EU",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)
Expand Down Expand Up @@ -887,6 +888,7 @@ def test_result_with_done_job_calls_get_query_results(self):
query_params={
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
"location": "EU",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)
Expand Down Expand Up @@ -1118,6 +1120,7 @@ def test_result_w_page_size(self):
"maxResults": 3,
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
"location": "US",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)
Expand All @@ -1129,6 +1132,7 @@ def test_result_w_page_size(self):
"maxResults": 3,
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
"location": "US",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test__helpers.py
Expand Up @@ -206,6 +206,22 @@ def test_w_float_value(self):
coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
)

def test_w_string_int_value(self):
from google.cloud._helpers import _EPOCH

coerced = self._call_fut("1234567", object())
self.assertEqual(
coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
)

def test_w_int_value(self):
from google.cloud._helpers import _EPOCH

coerced = self._call_fut(1234567, object())
self.assertEqual(
coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
)


class Test_timestamp_query_param_from_json(unittest.TestCase):
def _call_fut(self, value, field):
Expand Down
37 changes: 30 additions & 7 deletions tests/unit/test_client.py
Expand Up @@ -6807,7 +6807,10 @@ def _bigquery_timestamp_float_repr(ts_float):
self.assertEqual(iterator.next_page_token, TOKEN)

conn.api_request.assert_called_once_with(
method="GET", path="/%s" % PATH, query_params={}, timeout=7.5
method="GET",
path="/%s" % PATH,
query_params={"formatOptions.useInt64Timestamp": True},
timeout=7.5,
)

def test_list_rows_w_start_index_w_page_size(self):
Expand Down Expand Up @@ -6856,20 +6859,30 @@ def test_list_rows_w_start_index_w_page_size(self):
self.assertEqual(len(rows), 2)
self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i))
self.assertEqual(rows[1], Row(("Bhettye Rhubble",), f2i))
self.assertEqual(extra_params, {"startIndex": 1})
self.assertEqual(
extra_params, {"startIndex": 1, "formatOptions.useInt64Timestamp": True}
)

conn.api_request.assert_has_calls(
[
mock.call(
method="GET",
path="/%s" % PATH,
query_params={"startIndex": 1, "maxResults": 2},
query_params={
"startIndex": 1,
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
),
mock.call(
method="GET",
path="/%s" % PATH,
query_params={"pageToken": "some-page-token", "maxResults": 2},
query_params={
"pageToken": "some-page-token",
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
),
]
Expand Down Expand Up @@ -6920,6 +6933,7 @@ def test_list_rows_query_params(self):
iterator = client.list_rows(table, **test[0])
six.next(iterator.pages)
req = conn.api_request.call_args_list[i]
test[1]["formatOptions.useInt64Timestamp"] = True
self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0])

def test_list_rows_repeated_fields(self):
Expand Down Expand Up @@ -6979,7 +6993,10 @@ def test_list_rows_repeated_fields(self):
conn.api_request.assert_called_once_with(
method="GET",
path="/%s" % PATH,
query_params={"selectedFields": "color,struct"},
query_params={
"selectedFields": "color,struct",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)

Expand Down Expand Up @@ -7047,7 +7064,10 @@ def test_list_rows_w_record_schema(self):
self.assertEqual(page_token, TOKEN)

conn.api_request.assert_called_once_with(
method="GET", path="/%s" % PATH, query_params={}, timeout=None
method="GET",
path="/%s" % PATH,
query_params={"formatOptions.useInt64Timestamp": True},
timeout=None,
)

def test_list_rows_with_missing_schema(self):
Expand Down Expand Up @@ -7109,7 +7129,10 @@ def test_list_rows_with_missing_schema(self):

rows = list(row_iter)
conn.api_request.assert_called_once_with(
method="GET", path=tabledata_path, query_params={}, timeout=None
method="GET",
path=tabledata_path,
query_params={"formatOptions.useInt64Timestamp": True},
timeout=None,
)
self.assertEqual(row_iter.total_rows, 3, msg=repr(table))
self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table))
Expand Down