From 6bb694713e7e2eeb433bac1eb97446b37458fe0b Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 19 May 2021 17:08:56 -0600 Subject: [PATCH 01/16] parse parameterized schema info --- google/cloud/bigquery/schema.py | 46 ++++++++++++++++++++++++++++++++- tests/unit/test_schema.py | 44 +++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index cb221d6de..eba61268b 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -77,6 +77,9 @@ def __init__( description=_DEFAULT_VALUE, fields=(), policy_tags=None, + precision=_DEFAULT_VALUE, + scale=_DEFAULT_VALUE, + maxLength=_DEFAULT_VALUE, ): self._properties = { "name": name, @@ -86,6 +89,12 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if precision is not _DEFAULT_VALUE: + self._properties["precision"] = precision + if scale is not _DEFAULT_VALUE: + self._properties["scale"] = scale + if maxLength is not _DEFAULT_VALUE: + self._properties["maxLength"] = maxLength self._fields = tuple(fields) self._policy_tags = policy_tags @@ -148,6 +157,21 @@ def description(self): """Optional[str]: description for the field.""" return self._properties.get("description") + @property + def precision(self): + """Optional[str]: numeric precision for the field.""" + return self._properties.get("precision") + + @property + def scale(self): + """Optional[str]: numeric scale for the field.""" + return self._properties.get("scale") + + @property + def maxLength(self): + """Optional[str]: maximum string or bytes length for the field.""" + return self._properties.get("maxLength") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -259,6 +283,13 @@ def __repr__(self): return "SchemaField{}".format(self._key()) +def _get_int(f, name): + v = f.get(name) + if v is not None: + v = int(v) + return v + + def _parse_schema_resource(info): """Parse a resource fragment into a schema field. @@ -278,10 +309,23 @@ def _parse_schema_resource(info): field_type = r_field["type"] mode = r_field.get("mode", "NULLABLE") description = r_field.get("description") + precision = _get_int(r_field, "precision") + scale = _get_int(r_field, "scale") + maxLength = _get_int(r_field, "maxLength") sub_fields = _parse_schema_resource(r_field) policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) schema.append( - SchemaField(name, field_type, mode, description, sub_fields, policy_tags) + SchemaField( + name, + field_type, + mode, + description, + sub_fields, + policy_tags, + precision=precision, + scale=scale, + maxLength=maxLength, + ) ) return schema diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 87baaf379..07c56705a 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -715,3 +715,47 @@ def test___hash__not_equals(self): set_one = {policy1} set_two = {policy2} self.assertNotEqual(set_one, set_two) + + +def test_parse_numeric_plain(): + from google.cloud.bigquery.schema import _parse_schema_resource + + [field] = _parse_schema_resource({"fields": [{"name": "n", "type": "NUMERIC"}]}) + assert (field.name, field.field_type, field.precision, field.scale) == ( + "n", + "NUMERIC", + None, + None, + ) + + +def test_parse_numeric_parameterized(): + from google.cloud.bigquery.schema import _parse_schema_resource + + [field] = _parse_schema_resource( + {"fields": [{"name": "n", "type": "NUMERIC", "precision": "10", "scale": "2"}]} + ) + assert (field.name, field.field_type, field.precision, field.scale) == ( + "n", + "NUMERIC", + 10, + 2, + ) + [field] = _parse_schema_resource( + {"fields": [{"name": "n", "type": "NUMERIC", "precision": "10"}]} + ) + assert (field.name, field.field_type, field.precision, field.scale) == ( + "n", + "NUMERIC", + 10, + None, + ) + + +def test_parse_string_parameterized(): + from google.cloud.bigquery.schema import _parse_schema_resource + + [field] = _parse_schema_resource( + {"fields": [{"name": "n", "type": "STRING", "maxLength": "2"}]} + ) + assert (field.name, field.field_type, field.maxLength) == ("n", "STRING", 2) From f046fed01a1b7c381ada325c8210610d97159b7a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 19 May 2021 17:31:05 -0600 Subject: [PATCH 02/16] Fixed SchemaField repr/key --- google/cloud/bigquery/schema.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index eba61268b..28fddd11c 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -215,9 +215,19 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ + field_type = self.field_type.upper() + if field_type == 'STRING': + if self.maxLength is not None: + field_type = f"STRING({self.maxLength})" + elif field_type == 'NUMERIC': + if self.precision is not None: + if self.scale is not None: + field_type = f"NUMERIC({self.precision}, {self.scale})" + else: + field_type = f"NUMERIC({self.precision})" return ( self.name, - self.field_type.upper(), + field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error self.description, From d8199f0f21eab937c086609dd36e84e440060d2a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 07:59:53 -0600 Subject: [PATCH 03/16] Fix code duplication between _parse_schema_resource and from_api_repr Move new parameterized-type code from _parse_schema_resource to from_api_repr and implement _parse_schema_resource in terms of from_api_repr. --- google/cloud/bigquery/schema.py | 36 ++++++--------------------------- 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 28fddd11c..791fded2c 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -122,6 +122,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": description=description, name=api_repr["name"], policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + precision=_get_int(api_repr, "precision"), + scale=_get_int(api_repr, "scale"), + maxLength=_get_int(api_repr, "maxLength"), ) @property @@ -294,8 +297,8 @@ def __repr__(self): def _get_int(f, name): - v = f.get(name) - if v is not None: + v = f.get(name, _DEFAULT_VALUE) + if v is not _DEFAULT_VALUE: v = int(v) return v @@ -310,34 +313,7 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ - if "fields" not in info: - return () - - schema = [] - for r_field in info["fields"]: - name = r_field["name"] - field_type = r_field["type"] - mode = r_field.get("mode", "NULLABLE") - description = r_field.get("description") - precision = _get_int(r_field, "precision") - scale = _get_int(r_field, "scale") - maxLength = _get_int(r_field, "maxLength") - sub_fields = _parse_schema_resource(r_field) - policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) - schema.append( - SchemaField( - name, - field_type, - mode, - description, - sub_fields, - policy_tags, - precision=precision, - scale=scale, - maxLength=maxLength, - ) - ) - return schema + return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] def _build_schema_resource(fields): From 8273afe2d4e89a87e5375493189126e6119ab98f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 08:00:45 -0600 Subject: [PATCH 04/16] empty schemas are lists now, just like non-empty schemas. --- tests/unit/test_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 90fc30b20..9483fe8dd 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1302,7 +1302,7 @@ def _verifySchema(self, query, resource): self.assertEqual(found.description, expected.get("description")) self.assertEqual(found.fields, expected.get("fields", ())) else: - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) def test_ctor_defaults(self): query = self._make_one(self._make_resource()) @@ -1312,7 +1312,7 @@ def test_ctor_defaults(self): self.assertIsNone(query.page_token) self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) From acc6f463366ed5e4e91b10e09245ce2da3a5ce53 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 08:03:18 -0600 Subject: [PATCH 05/16] changed new parameterized-type tests to use from_api_repr Because that's more direct and it uncovered duplicate code. --- tests/unit/test_schema.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 07c56705a..80ea50059 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -718,9 +718,9 @@ def test___hash__not_equals(self): def test_parse_numeric_plain(): - from google.cloud.bigquery.schema import _parse_schema_resource + from google.cloud.bigquery.schema import SchemaField - [field] = _parse_schema_resource({"fields": [{"name": "n", "type": "NUMERIC"}]}) + field = SchemaField.from_api_repr({"name": "n", "type": "NUMERIC"}) assert (field.name, field.field_type, field.precision, field.scale) == ( "n", "NUMERIC", @@ -730,10 +730,10 @@ def test_parse_numeric_plain(): def test_parse_numeric_parameterized(): - from google.cloud.bigquery.schema import _parse_schema_resource + from google.cloud.bigquery.schema import SchemaField - [field] = _parse_schema_resource( - {"fields": [{"name": "n", "type": "NUMERIC", "precision": "10", "scale": "2"}]} + field = SchemaField.from_api_repr( + {"name": "n", "type": "NUMERIC", "precision": "10", "scale": "2"} ) assert (field.name, field.field_type, field.precision, field.scale) == ( "n", @@ -741,8 +741,8 @@ def test_parse_numeric_parameterized(): 10, 2, ) - [field] = _parse_schema_resource( - {"fields": [{"name": "n", "type": "NUMERIC", "precision": "10"}]} + field = SchemaField.from_api_repr( + {"name": "n", "type": "NUMERIC", "precision": "10"} ) assert (field.name, field.field_type, field.precision, field.scale) == ( "n", @@ -753,9 +753,7 @@ def test_parse_numeric_parameterized(): def test_parse_string_parameterized(): - from google.cloud.bigquery.schema import _parse_schema_resource + from google.cloud.bigquery.schema import SchemaField - [field] = _parse_schema_resource( - {"fields": [{"name": "n", "type": "STRING", "maxLength": "2"}]} - ) + field = SchemaField.from_api_repr({"name": "n", "type": "STRING", "maxLength": "2"}) assert (field.name, field.field_type, field.maxLength) == ("n", "STRING", 2) From c01bc1ae1d138faadbc7c500b01dd780bdf96aa0 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 08:33:12 -0600 Subject: [PATCH 06/16] paramaterized the from_api_repr tests and added to_api_repr tests --- tests/unit/test_schema.py | 84 ++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 80ea50059..2beec4d22 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -15,6 +15,7 @@ import unittest import mock +import pytest class TestSchemaField(unittest.TestCase): @@ -717,43 +718,54 @@ def test___hash__not_equals(self): self.assertNotEqual(set_one, set_two) -def test_parse_numeric_plain(): - from google.cloud.bigquery.schema import SchemaField - - field = SchemaField.from_api_repr({"name": "n", "type": "NUMERIC"}) - assert (field.name, field.field_type, field.precision, field.scale) == ( - "n", - "NUMERIC", - None, - None, - ) - - -def test_parse_numeric_parameterized(): +@pytest.mark.parametrize( + "api,expect", + [ + ( + dict(name='n', type='NUMERIC'), + ('n', 'NUMERIC', None, None, None), + ), + ( + dict(name='n', type='NUMERIC', precision=9), + ('n', 'NUMERIC', 9, None, None), + ), + ( + dict(name='n', type='NUMERIC', precision=9, scale=2), + ('n', 'NUMERIC', 9, 2, None), + ), + ( + dict(name='n', type='STRING'), + ('n', 'STRING', None, None, None), + ), + ( + dict(name='n', type='STRING', maxLength=9), + ('n', 'STRING', None, None, 9), + ), + ]) +def test_from_api_repr_parameterized(api, expect): from google.cloud.bigquery.schema import SchemaField - field = SchemaField.from_api_repr( - {"name": "n", "type": "NUMERIC", "precision": "10", "scale": "2"} - ) - assert (field.name, field.field_type, field.precision, field.scale) == ( - "n", - "NUMERIC", - 10, - 2, - ) - field = SchemaField.from_api_repr( - {"name": "n", "type": "NUMERIC", "precision": "10"} - ) - assert (field.name, field.field_type, field.precision, field.scale) == ( - "n", - "NUMERIC", - 10, - None, - ) - - -def test_parse_string_parameterized(): + field = SchemaField.from_api_repr(api) + + assert ((field.name, field.field_type, field.precision, field.scale, field.maxLength) + == expect) + + +@pytest.mark.parametrize( + "field,api", + [ + (dict(name='n', field_type='NUMERIC'), + dict(name='n', type='NUMERIC', mode='NULLABLE')), + (dict(name='n', field_type='NUMERIC', precision=9), + dict(name='n', type='NUMERIC', mode='NULLABLE', precision=9)), + (dict(name='n', field_type='NUMERIC', precision=9, scale=2), + dict(name='n', type='NUMERIC', mode='NULLABLE', precision=9, scale=2)), + (dict(name='n', field_type='STRING'), + dict(name='n', type='STRING', mode='NULLABLE')), + (dict(name='n', field_type='STRING', maxLength=9), + dict(name='n', type='STRING', mode='NULLABLE', maxLength=9)), + ]) +def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField - field = SchemaField.from_api_repr({"name": "n", "type": "STRING", "maxLength": "2"}) - assert (field.name, field.field_type, field.maxLength) == ("n", "STRING", 2) + assert SchemaField(**field).to_api_repr() == api From 2ab558cb1d5937ba03944bf98a567080cca20757 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 09:02:48 -0600 Subject: [PATCH 07/16] Test BYTES and _key (repr) too. --- tests/unit/test_schema.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 2beec4d22..0770da282 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -719,30 +719,45 @@ def test___hash__not_equals(self): @pytest.mark.parametrize( - "api,expect", + "api,expect,key2", [ ( dict(name='n', type='NUMERIC'), ('n', 'NUMERIC', None, None, None), + ('n', 'NUMERIC'), ), ( dict(name='n', type='NUMERIC', precision=9), ('n', 'NUMERIC', 9, None, None), + ('n', 'NUMERIC(9)'), ), ( dict(name='n', type='NUMERIC', precision=9, scale=2), ('n', 'NUMERIC', 9, 2, None), + ('n', 'NUMERIC(9, 2)'), ), ( dict(name='n', type='STRING'), ('n', 'STRING', None, None, None), + ('n', 'STRING'), ), ( dict(name='n', type='STRING', maxLength=9), ('n', 'STRING', None, None, 9), + ('n', 'STRING(9)'), + ), + ( + dict(name='n', type='BYTES'), + ('n', 'BYTES', None, None, None), + ('n', 'BYTES'), + ), + ( + dict(name='n', type='BYTES', maxLength=9), + ('n', 'BYTES', None, None, 9), + ('n', 'BYTES(9)'), ), ]) -def test_from_api_repr_parameterized(api, expect): +def test_from_api_repr_parameterized(api, expect, key2): from google.cloud.bigquery.schema import SchemaField field = SchemaField.from_api_repr(api) @@ -750,6 +765,8 @@ def test_from_api_repr_parameterized(api, expect): assert ((field.name, field.field_type, field.precision, field.scale, field.maxLength) == expect) + assert field._key()[:2] == key2 + @pytest.mark.parametrize( "field,api", @@ -764,6 +781,10 @@ def test_from_api_repr_parameterized(api, expect): dict(name='n', type='STRING', mode='NULLABLE')), (dict(name='n', field_type='STRING', maxLength=9), dict(name='n', type='STRING', mode='NULLABLE', maxLength=9)), + (dict(name='n', field_type='BYTES'), + dict(name='n', type='BYTES', mode='NULLABLE')), + (dict(name='n', field_type='BYTES', maxLength=9), + dict(name='n', type='BYTES', mode='NULLABLE', maxLength=9)), ]) def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField From 2d021ae5384f6e6bf6ec17d0551620e62fd29b30 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 09:18:08 -0600 Subject: [PATCH 08/16] Added a round-trip parameterized types schema tests --- tests/system/test_client.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7c8ef50fa..70889430f 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2173,6 +2173,32 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) + def test_parameterized_types_round_trip(self): + client = Config.CLIENT + table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + client.query( + "create table {} ({})".format( + table_id, ", ".join(" ".join(f) for f in fields) + ) + ).result() + table = client.get_table(table_id) + self.to_delete.insert(0, table) + table_id2 = table_id + "2" + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + self.to_delete.insert(0, table2) + + self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) From 4a1e4b719b93b6a75281747e91e7a0991a743530 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 09:20:00 -0600 Subject: [PATCH 09/16] handle BYTES in _key/repr --- google/cloud/bigquery/schema.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 791fded2c..f10c8e917 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -219,10 +219,10 @@ def _key(self): Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ field_type = self.field_type.upper() - if field_type == 'STRING': + if field_type == "STRING" or field_type == "BYTES": if self.maxLength is not None: - field_type = f"STRING({self.maxLength})" - elif field_type == 'NUMERIC': + field_type = f"{field_type}({self.maxLength})" + elif field_type == "NUMERIC": if self.precision is not None: if self.scale is not None: field_type = f"NUMERIC({self.precision}, {self.scale})" From c4056b03ee3d94f1ffce4c62be5ca7687530d839 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 09:20:17 -0600 Subject: [PATCH 10/16] blacken --- tests/unit/test_schema.py | 113 ++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 46 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 0770da282..867ef71e0 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -722,48 +722,54 @@ def test___hash__not_equals(self): "api,expect,key2", [ ( - dict(name='n', type='NUMERIC'), - ('n', 'NUMERIC', None, None, None), - ('n', 'NUMERIC'), - ), + dict(name="n", type="NUMERIC"), + ("n", "NUMERIC", None, None, None), + ("n", "NUMERIC"), + ), ( - dict(name='n', type='NUMERIC', precision=9), - ('n', 'NUMERIC', 9, None, None), - ('n', 'NUMERIC(9)'), - ), + dict(name="n", type="NUMERIC", precision=9), + ("n", "NUMERIC", 9, None, None), + ("n", "NUMERIC(9)"), + ), ( - dict(name='n', type='NUMERIC', precision=9, scale=2), - ('n', 'NUMERIC', 9, 2, None), - ('n', 'NUMERIC(9, 2)'), - ), + dict(name="n", type="NUMERIC", precision=9, scale=2), + ("n", "NUMERIC", 9, 2, None), + ("n", "NUMERIC(9, 2)"), + ), ( - dict(name='n', type='STRING'), - ('n', 'STRING', None, None, None), - ('n', 'STRING'), - ), + dict(name="n", type="STRING"), + ("n", "STRING", None, None, None), + ("n", "STRING"), + ), ( - dict(name='n', type='STRING', maxLength=9), - ('n', 'STRING', None, None, 9), - ('n', 'STRING(9)'), - ), + dict(name="n", type="STRING", maxLength=9), + ("n", "STRING", None, None, 9), + ("n", "STRING(9)"), + ), ( - dict(name='n', type='BYTES'), - ('n', 'BYTES', None, None, None), - ('n', 'BYTES'), - ), + dict(name="n", type="BYTES"), + ("n", "BYTES", None, None, None), + ("n", "BYTES"), + ), ( - dict(name='n', type='BYTES', maxLength=9), - ('n', 'BYTES', None, None, 9), - ('n', 'BYTES(9)'), - ), - ]) + dict(name="n", type="BYTES", maxLength=9), + ("n", "BYTES", None, None, 9), + ("n", "BYTES(9)"), + ), + ], +) def test_from_api_repr_parameterized(api, expect, key2): from google.cloud.bigquery.schema import SchemaField field = SchemaField.from_api_repr(api) - assert ((field.name, field.field_type, field.precision, field.scale, field.maxLength) - == expect) + assert ( + field.name, + field.field_type, + field.precision, + field.scale, + field.maxLength, + ) == expect assert field._key()[:2] == key2 @@ -771,21 +777,36 @@ def test_from_api_repr_parameterized(api, expect, key2): @pytest.mark.parametrize( "field,api", [ - (dict(name='n', field_type='NUMERIC'), - dict(name='n', type='NUMERIC', mode='NULLABLE')), - (dict(name='n', field_type='NUMERIC', precision=9), - dict(name='n', type='NUMERIC', mode='NULLABLE', precision=9)), - (dict(name='n', field_type='NUMERIC', precision=9, scale=2), - dict(name='n', type='NUMERIC', mode='NULLABLE', precision=9, scale=2)), - (dict(name='n', field_type='STRING'), - dict(name='n', type='STRING', mode='NULLABLE')), - (dict(name='n', field_type='STRING', maxLength=9), - dict(name='n', type='STRING', mode='NULLABLE', maxLength=9)), - (dict(name='n', field_type='BYTES'), - dict(name='n', type='BYTES', mode='NULLABLE')), - (dict(name='n', field_type='BYTES', maxLength=9), - dict(name='n', type='BYTES', mode='NULLABLE', maxLength=9)), - ]) + ( + dict(name="n", field_type="NUMERIC"), + dict(name="n", type="NUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9, scale=2), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + ), + ( + dict(name="n", field_type="STRING"), + dict(name="n", type="STRING", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="STRING", maxLength=9), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + ), + ( + dict(name="n", field_type="BYTES"), + dict(name="n", type="BYTES", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BYTES", maxLength=9), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + ), + ], +) def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField From a238036cd058119a37d42994f9c3002d5122695e Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 10:51:31 -0600 Subject: [PATCH 11/16] Move _get_int close to use --- google/cloud/bigquery/schema.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f10c8e917..379dc2751 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -98,6 +98,13 @@ def __init__( self._fields = tuple(fields) self._policy_tags = policy_tags + @staticmethod + def __get_int(f, name): + v = f.get(name, _DEFAULT_VALUE) + if v is not _DEFAULT_VALUE: + v = int(v) + return v + @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. @@ -122,9 +129,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": description=description, name=api_repr["name"], policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), - precision=_get_int(api_repr, "precision"), - scale=_get_int(api_repr, "scale"), - maxLength=_get_int(api_repr, "maxLength"), + precision=cls.__get_int(api_repr, "precision"), + scale=cls.__get_int(api_repr, "scale"), + maxLength=cls.__get_int(api_repr, "maxLength"), ) @property @@ -296,13 +303,6 @@ def __repr__(self): return "SchemaField{}".format(self._key()) -def _get_int(f, name): - v = f.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v - - def _parse_schema_resource(info): """Parse a resource fragment into a schema field. From 554696be362a00007cc21424ce932ba5c2c34c51 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 11:03:27 -0600 Subject: [PATCH 12/16] Updated documentation. --- google/cloud/bigquery/schema.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 379dc2751..d003b417d 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -67,6 +67,15 @@ class SchemaField(object): policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + precision (Optional[int]): + Precison (number of digits) of fields with NUMERIC type. + + scale (Optional[int]): + Scale (digits after decimal) of fields with NUMERIC type. + + maxLength (Optional[int]): + Maximim length of fields with STRING or BYTES type. + """ def __init__( @@ -169,17 +178,17 @@ def description(self): @property def precision(self): - """Optional[str]: numeric precision for the field.""" + """Optional[int]: Precision (number of digits) for the NUMERIC field.""" return self._properties.get("precision") @property def scale(self): - """Optional[str]: numeric scale for the field.""" + """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" return self._properties.get("scale") @property def maxLength(self): - """Optional[str]: maximum string or bytes length for the field.""" + """Optional[int]: Maximum length for the STRING or BYTES field.""" return self._properties.get("maxLength") @property From 9f5cbd5c3dfc72c449221897940c6776cbc6029a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 20 May 2021 15:36:02 -0600 Subject: [PATCH 13/16] Oops, forgot BIGNUMERIC --- google/cloud/bigquery/schema.py | 6 +++--- tests/system/test_client.py | 3 +++ tests/unit/test_schema.py | 27 +++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index d003b417d..83713bbba 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -238,12 +238,12 @@ def _key(self): if field_type == "STRING" or field_type == "BYTES": if self.maxLength is not None: field_type = f"{field_type}({self.maxLength})" - elif field_type == "NUMERIC": + elif field_type.endswith("NUMERIC"): if self.precision is not None: if self.scale is not None: - field_type = f"NUMERIC({self.precision}, {self.scale})" + field_type = f"{field_type}({self.precision}, {self.scale})" else: - field_type = f"NUMERIC({self.precision})" + field_type = f"{field_type}({self.precision})" return ( self.name, field_type, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 70889430f..61b679e1a 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2180,6 +2180,9 @@ def test_parameterized_types_round_trip(self): ("n", "NUMERIC"), ("n9", "NUMERIC(9)"), ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), ("s", "STRING"), ("s9", "STRING(9)"), ("b", "BYTES"), diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 867ef71e0..77ea4f510 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -736,6 +736,21 @@ def test___hash__not_equals(self): ("n", "NUMERIC", 9, 2, None), ("n", "NUMERIC(9, 2)"), ), + ( + dict(name="n", type="BIGNUMERIC"), + ("n", "BIGNUMERIC", None, None, None), + ("n", "BIGNUMERIC"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40), + ("n", "BIGNUMERIC", 40, None, None), + ("n", "BIGNUMERIC(40)"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40, scale=2), + ("n", "BIGNUMERIC", 40, 2, None), + ("n", "BIGNUMERIC(40, 2)"), + ), ( dict(name="n", type="STRING"), ("n", "STRING", None, None, None), @@ -789,6 +804,18 @@ def test_from_api_repr_parameterized(api, expect, key2): dict(name="n", field_type="NUMERIC", precision=9, scale=2), dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), ), + ( + dict(name="n", field_type="BIGNUMERIC"), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + ), ( dict(name="n", field_type="STRING"), dict(name="n", type="STRING", mode="NULLABLE"), From 32226e1779c613593525dd4c1e1c738495ec52f0 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 09:15:23 -0600 Subject: [PATCH 14/16] Improve argument doc and better argument name to __get_int --- google/cloud/bigquery/schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 83713bbba..d2bf6e9f3 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -68,10 +68,10 @@ class SchemaField(object): policy_tags (Optional[PolicyTagList]): The policy tag list for the field. precision (Optional[int]): - Precison (number of digits) of fields with NUMERIC type. + Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. scale (Optional[int]): - Scale (digits after decimal) of fields with NUMERIC type. + Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. maxLength (Optional[int]): Maximim length of fields with STRING or BYTES type. @@ -108,8 +108,8 @@ def __init__( self._policy_tags = policy_tags @staticmethod - def __get_int(f, name): - v = f.get(name, _DEFAULT_VALUE) + def __get_int(api_repr, name): + v = api_repr.get(name, _DEFAULT_VALUE) if v is not _DEFAULT_VALUE: v = int(v) return v From 01b714b69940901ae25356cabeeb7b6c624effb6 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 09:33:55 -0600 Subject: [PATCH 15/16] doom tables before creating them. --- tests/system/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 61b679e1a..b4b0c053d 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2188,17 +2188,17 @@ def test_parameterized_types_round_trip(self): ("b", "BYTES"), ("b9", "BYTES(9)"), ) + self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) client.query( "create table {} ({})".format( table_id, ", ".join(" ".join(f) for f in fields) ) ).result() table = client.get_table(table_id) - self.to_delete.insert(0, table) table_id2 = table_id + "2" + self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) table2 = client.get_table(table_id2) - self.to_delete.insert(0, table2) self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) From cac957bd2c9117dc288c0c43a5fe0308e61279c8 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 09:59:47 -0600 Subject: [PATCH 16/16] Use max_length in the Python for the REST api maxLength --- google/cloud/bigquery/schema.py | 16 ++++++++-------- tests/unit/test_schema.py | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index d2bf6e9f3..919d78b23 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -73,7 +73,7 @@ class SchemaField(object): scale (Optional[int]): Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - maxLength (Optional[int]): + max_length (Optional[int]): Maximim length of fields with STRING or BYTES type. """ @@ -88,7 +88,7 @@ def __init__( policy_tags=None, precision=_DEFAULT_VALUE, scale=_DEFAULT_VALUE, - maxLength=_DEFAULT_VALUE, + max_length=_DEFAULT_VALUE, ): self._properties = { "name": name, @@ -102,8 +102,8 @@ def __init__( self._properties["precision"] = precision if scale is not _DEFAULT_VALUE: self._properties["scale"] = scale - if maxLength is not _DEFAULT_VALUE: - self._properties["maxLength"] = maxLength + if max_length is not _DEFAULT_VALUE: + self._properties["maxLength"] = max_length self._fields = tuple(fields) self._policy_tags = policy_tags @@ -140,7 +140,7 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), - maxLength=cls.__get_int(api_repr, "maxLength"), + max_length=cls.__get_int(api_repr, "maxLength"), ) @property @@ -187,7 +187,7 @@ def scale(self): return self._properties.get("scale") @property - def maxLength(self): + def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" return self._properties.get("maxLength") @@ -236,8 +236,8 @@ def _key(self): """ field_type = self.field_type.upper() if field_type == "STRING" or field_type == "BYTES": - if self.maxLength is not None: - field_type = f"{field_type}({self.maxLength})" + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" elif field_type.endswith("NUMERIC"): if self.precision is not None: if self.scale is not None: diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 77ea4f510..29c3bace5 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -783,7 +783,7 @@ def test_from_api_repr_parameterized(api, expect, key2): field.field_type, field.precision, field.scale, - field.maxLength, + field.max_length, ) == expect assert field._key()[:2] == key2 @@ -821,7 +821,7 @@ def test_from_api_repr_parameterized(api, expect, key2): dict(name="n", type="STRING", mode="NULLABLE"), ), ( - dict(name="n", field_type="STRING", maxLength=9), + dict(name="n", field_type="STRING", max_length=9), dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), ), ( @@ -829,7 +829,7 @@ def test_from_api_repr_parameterized(api, expect, key2): dict(name="n", type="BYTES", mode="NULLABLE"), ), ( - dict(name="n", field_type="BYTES", maxLength=9), + dict(name="n", field_type="BYTES", max_length=9), dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), ), ],