From f579d1783061ba1d8d79cb701cfb7ff7ae91b78a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 30 Jun 2020 16:29:02 +0200 Subject: [PATCH 1/3] fix: raise error if inserting rows with unknown fields --- google/cloud/bigquery/_helpers.py | 29 ++++++++++++++++++++++++++++- tests/unit/test__helpers.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d814eec8c..2310224ae 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -419,9 +419,23 @@ def _record_field_to_json(fields, row_value): Returns: Mapping[str, Any]: A JSON-serializable dictionary. """ - record = {} isdict = isinstance(row_value, dict) + # If row is passed as a tuple, make the length sanity check to avoid either + # uninformative index errors a few lines below or silently omitting some of + # the values from the result (we cannot know exactly which fields are missing + # or redundant, since we don't have their names). + if not isdict and len(row_value) != len(fields): + msg = "The number of row fields ({}) does not match schema length ({}).".format( + len(row_value), len(fields) + ) + raise ValueError(msg) + + record = {} + + if isdict: + processed_fields = set() + for subindex, subfield in enumerate(fields): subname = subfield.name subvalue = row_value.get(subname) if isdict else row_value[subindex] @@ -430,6 +444,19 @@ def _record_field_to_json(fields, row_value): if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) + if isdict: + processed_fields.add(subname) + + # Unknown fields should not be silently dropped. + if isdict: + not_processed = set(row_value.keys()) - processed_fields + if not_processed: + raise ValueError( + "Unknown field(s) not present in schema: {}".format( + ", ".join(not_processed) + ) + ) + return record diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index fa6d27c98..4dd4a8520 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -18,6 +18,7 @@ import unittest import mock +import six class Test_not_null(unittest.TestCase): @@ -847,6 +848,26 @@ def test_w_non_empty_list(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) + def test_w_list_missing_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + + def test_w_list_too_many_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42, "two", "three"] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + def test_w_non_empty_dict(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), @@ -890,6 +911,16 @@ def test_w_explicit_none_value(self): # None values should be dropped regardless of the field type self.assertEqual(converted, {"one": "42"}) + def test_w_dict_unknown_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {"whoami": "_?_?_", "one": 111, "two": "222"} + + with six.assertRaisesRegex(self, ValueError, r".*[Uu]nknown field.*whoami.*"): + self._call_fut(fields, original) + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): From dac6448c174c0e887d9aeebe0c9d926b5f58ead5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 20 Jul 2020 17:10:43 +0200 Subject: [PATCH 2/3] Include unknown fields in request when inserting rows --- google/cloud/bigquery/_helpers.py | 16 +++++++++------- tests/unit/test__helpers.py | 10 +++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 2310224ae..47851d42c 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import datetime import decimal import re +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -447,15 +448,16 @@ def _record_field_to_json(fields, row_value): if isdict: processed_fields.add(subname) - # Unknown fields should not be silently dropped. + # Unknown fields should not be silently dropped, include them. Since there + # is no schema information available for them, include them as strings + # to make them JSON-serializable. if isdict: not_processed = set(row_value.keys()) - processed_fields - if not_processed: - raise ValueError( - "Unknown field(s) not present in schema: {}".format( - ", ".join(not_processed) - ) - ) + + for field_name in not_processed: + value = row_value[field_name] + if value is not None: + record[field_name] = six.text_type(value) return record diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 4dd4a8520..c29bedd7b 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -916,10 +916,14 @@ def test_w_dict_unknown_fields(self): _make_field("INT64", name="one", mode="NULLABLE"), _make_field("STRING", name="two", mode="NULLABLE"), ] - original = {"whoami": "_?_?_", "one": 111, "two": "222"} + original = {"whoami": datetime.date(2020, 7, 20), "one": 111, "two": "222"} - with six.assertRaisesRegex(self, ValueError, r".*[Uu]nknown field.*whoami.*"): - self._call_fut(fields, original) + converted = self._call_fut(fields, original) + + # Unknown fields should be included, but converted as strings. + self.assertEqual( + converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + ) class Test_field_to_json(unittest.TestCase): From db548537733c9c2c74829d28fce0aa3b0936acbc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 20 Jul 2020 18:08:12 +0200 Subject: [PATCH 3/3] Cover missed branch path in a test --- tests/unit/test__helpers.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index c29bedd7b..28ebe8144 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -916,11 +916,16 @@ def test_w_dict_unknown_fields(self): _make_field("INT64", name="one", mode="NULLABLE"), _make_field("STRING", name="two", mode="NULLABLE"), ] - original = {"whoami": datetime.date(2020, 7, 20), "one": 111, "two": "222"} + original = { + "whoami": datetime.date(2020, 7, 20), + "one": 111, + "two": "222", + "void": None, + } converted = self._call_fut(fields, original) - # Unknown fields should be included, but converted as strings. + # Unknown fields should be included (if not None), but converted as strings. self.assertEqual( converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, )