Skip to content

Commit

Permalink
fix: raise error if inserting rows with unknown fields (#163)
Browse files Browse the repository at this point in the history
Co-authored-by: Tres Seaver <tseaver@palladion.com>
  • Loading branch information
plamut and tseaver committed Jul 30, 2020
1 parent f59fc9a commit 8fe7254
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 1 deletion.
31 changes: 30 additions & 1 deletion google/cloud/bigquery/_helpers.py
Expand Up @@ -19,6 +19,7 @@
import datetime
import decimal
import re
import six

from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
Expand Down Expand Up @@ -419,9 +420,23 @@ def _record_field_to_json(fields, row_value):
Returns:
Mapping[str, Any]: A JSON-serializable dictionary.
"""
record = {}
isdict = isinstance(row_value, dict)

# If row is passed as a tuple, make the length sanity check to avoid either
# uninformative index errors a few lines below or silently omitting some of
# the values from the result (we cannot know exactly which fields are missing
# or redundant, since we don't have their names).
if not isdict and len(row_value) != len(fields):
msg = "The number of row fields ({}) does not match schema length ({}).".format(
len(row_value), len(fields)
)
raise ValueError(msg)

record = {}

if isdict:
processed_fields = set()

for subindex, subfield in enumerate(fields):
subname = subfield.name
subvalue = row_value.get(subname) if isdict else row_value[subindex]
Expand All @@ -430,6 +445,20 @@ def _record_field_to_json(fields, row_value):
if subvalue is not None:
record[subname] = _field_to_json(subfield, subvalue)

if isdict:
processed_fields.add(subname)

# Unknown fields should not be silently dropped, include them. Since there
# is no schema information available for them, include them as strings
# to make them JSON-serializable.
if isdict:
not_processed = set(row_value.keys()) - processed_fields

for field_name in not_processed:
value = row_value[field_name]
if value is not None:
record[field_name] = six.text_type(value)

return record


Expand Down
40 changes: 40 additions & 0 deletions tests/unit/test__helpers.py
Expand Up @@ -18,6 +18,7 @@
import unittest

import mock
import six


class Test_not_null(unittest.TestCase):
Expand Down Expand Up @@ -847,6 +848,26 @@ def test_w_non_empty_list(self):
converted = self._call_fut(fields, original)
self.assertEqual(converted, {"one": "42", "two": "two"})

def test_w_list_missing_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = [42]

with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
self._call_fut(fields, original)

def test_w_list_too_many_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = [42, "two", "three"]

with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
self._call_fut(fields, original)

def test_w_non_empty_dict(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
Expand Down Expand Up @@ -890,6 +911,25 @@ def test_w_explicit_none_value(self):
# None values should be dropped regardless of the field type
self.assertEqual(converted, {"one": "42"})

def test_w_dict_unknown_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = {
"whoami": datetime.date(2020, 7, 20),
"one": 111,
"two": "222",
"void": None,
}

converted = self._call_fut(fields, original)

# Unknown fields should be included (if not None), but converted as strings.
self.assertEqual(
converted, {"whoami": "2020-07-20", "one": "111", "two": "222"},
)


class Test_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
Expand Down

0 comments on commit 8fe7254

Please sign in to comment.