googleapis · tseaver · Jul 30, 2020 · Jun 30, 2020 · Jul 20, 2020 · Jul 20, 2020
@@ -419,9 +419,23 @@ def _record_field_to_json(fields, row_value):
     Returns:
         Mapping[str, Any]: A JSON-serializable dictionary.
     """
-    record = {}
     isdict = isinstance(row_value, dict)
 
+    # If row is passed as a tuple, make the length sanity check to avoid either
+    # uninformative index errors a few lines below or silently omitting some of
+    # the values from the result (we cannot know exactly which fields are missing
+    # or redundant, since we don't have their names).
+    if not isdict and len(row_value) != len(fields):
+        msg = "The number of row fields ({}) does not match schema length ({}).".format(
+            len(row_value), len(fields)
+        )
+        raise ValueError(msg)
+
+    record = {}
+
+    if isdict:
+        processed_fields = set()
+
     for subindex, subfield in enumerate(fields):
         subname = subfield.name
         subvalue = row_value.get(subname) if isdict else row_value[subindex]
@@ -430,6 +444,19 @@ def _record_field_to_json(fields, row_value):
         if subvalue is not None:
             record[subname] = _field_to_json(subfield, subvalue)
 
+        if isdict:
+            processed_fields.add(subname)
+
+    # Unknown fields should not be silently dropped.
+    if isdict:
+        not_processed = set(row_value.keys()) - processed_fields
+        if not_processed:
+            raise ValueError(
+                "Unknown field(s) not present in schema: {}".format(
+                    ", ".join(not_processed)
+                )
+            )
+
     return record
 
 

@@ -18,6 +18,7 @@
 import unittest
 
 import mock
+import six
 
 
 class Test_not_null(unittest.TestCase):
@@ -847,6 +848,26 @@ def test_w_non_empty_list(self):
         converted = self._call_fut(fields, original)
         self.assertEqual(converted, {"one": "42", "two": "two"})
 
+    def test_w_list_missing_fields(self):
+        fields = [
+            _make_field("INT64", name="one", mode="NULLABLE"),
+            _make_field("STRING", name="two", mode="NULLABLE"),
+        ]
+        original = [42]
+
+        with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
+            self._call_fut(fields, original)
+
+    def test_w_list_too_many_fields(self):
+        fields = [
+            _make_field("INT64", name="one", mode="NULLABLE"),
+            _make_field("STRING", name="two", mode="NULLABLE"),
+        ]
+        original = [42, "two", "three"]
+
+        with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"):
+            self._call_fut(fields, original)
+
     def test_w_non_empty_dict(self):
         fields = [
             _make_field("INT64", name="one", mode="NULLABLE"),
@@ -890,6 +911,16 @@ def test_w_explicit_none_value(self):
         # None values should be dropped regardless of the field type
         self.assertEqual(converted, {"one": "42"})
 
+    def test_w_dict_unknown_fields(self):
+        fields = [
+            _make_field("INT64", name="one", mode="NULLABLE"),
+            _make_field("STRING", name="two", mode="NULLABLE"),
+        ]
+        original = {"whoami": "_?_?_", "one": 111, "two": "222"}
+
+        with six.assertRaisesRegex(self, ValueError, r".*[Uu]nknown field.*whoami.*"):
+            self._call_fut(fields, original)
+
 
 class Test_field_to_json(unittest.TestCase):
     def _call_fut(self, field, value):