Skip to content

Commit

Permalink
fix: raise error if inserting rows with unknown fields
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Jul 11, 2020
1 parent dbaf3bd commit f579d17
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 1 deletion.
29 changes: 28 additions & 1 deletion google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,23 @@ def _record_field_to_json(fields, row_value):
Returns:
Mapping[str, Any]: A JSON-serializable dictionary.
"""
record = {}
isdict = isinstance(row_value, dict)

# If row is passed as a tuple, make the length sanity check to avoid either
# uninformative index errors a few lines below or silently omitting some of
# the values from the result (we cannot know exactly which fields are missing
# or redundant, since we don't have their names).
if not isdict and len(row_value) != len(fields):
msg = "The number of row fields ({}) does not match schema length ({}).".format(
len(row_value), len(fields)
)
raise ValueError(msg)

record = {}

if isdict:
processed_fields = set()

for subindex, subfield in enumerate(fields):
subname = subfield.name
subvalue = row_value.get(subname) if isdict else row_value[subindex]
Expand All @@ -430,6 +444,19 @@ def _record_field_to_json(fields, row_value):
if subvalue is not None:
record[subname] = _field_to_json(subfield, subvalue)

if isdict:
processed_fields.add(subname)

# Unknown fields should not be silently dropped.
if isdict:
not_processed = set(row_value.keys()) - processed_fields
if not_processed:
raise ValueError(
"Unknown field(s) not present in schema: {}".format(
", ".join(not_processed)
)
)

return record


Expand Down
31 changes: 31 additions & 0 deletions tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import unittest

import mock
import six


class Test_not_null(unittest.TestCase):
Expand Down Expand Up @@ -847,6 +848,26 @@ def test_w_non_empty_list(self):
converted = self._call_fut(fields, original)
self.assertEqual(converted, {"one": "42", "two": "two"})

def test_w_list_missing_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = [42]

with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
self._call_fut(fields, original)

def test_w_list_too_many_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = [42, "two", "three"]

with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
self._call_fut(fields, original)

def test_w_non_empty_dict(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
Expand Down Expand Up @@ -890,6 +911,16 @@ def test_w_explicit_none_value(self):
# None values should be dropped regardless of the field type
self.assertEqual(converted, {"one": "42"})

def test_w_dict_unknown_fields(self):
fields = [
_make_field("INT64", name="one", mode="NULLABLE"),
_make_field("STRING", name="two", mode="NULLABLE"),
]
original = {"whoami": "_?_?_", "one": 111, "two": "222"}

with six.assertRaisesRegex(self, ValueError, r".*[Uu]nknown field.*whoami.*"):
self._call_fut(fields, original)


class Test_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
Expand Down

0 comments on commit f579d17

Please sign in to comment.