diff --git a/docs/reference.rst b/docs/reference.rst index d8738e67b..f4f11abc9 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -128,6 +128,7 @@ Schema :toctree: generated schema.SchemaField + schema.PolicyTagList Query diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index a7a0da3dd..931e0f3e6 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -88,6 +88,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row @@ -140,6 +141,7 @@ "RoutineReference", # Shared helpers "SchemaField", + "PolicyTagList", "UDFResource", "ExternalConfig", "BigtableOptions", diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 157db7ce6..5bad52273 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,12 +15,12 @@ """Schemas for BigQuery tables / queries.""" import collections -from typing import Optional +import enum +from typing import Iterable, Union from google.cloud.bigquery_v2 import types -_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -49,47 +49,62 @@ """String names of the legacy SQL types to integer codes of Standard SQL types.""" +class _DefaultSentinel(enum.Enum): + """Object used as 'sentinel' indicating default value should be used. + + Uses enum so that pytype/mypy knows that this is the only possible value. + https://stackoverflow.com/a/60605919/101923 + + Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. + https://docs.python.org/3/library/typing.html#typing.Literal + """ + + DEFAULT_VALUE = object() + + +_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE + + class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): The name of the field. + name: The name of the field. - field_type (str): The type of the field. See + field_type: + The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (Optional[str]): The mode of the field. See + mode: + Defaults to ``'NULLABLE'``. The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): Description for the field. + description: Description for the field. - fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): - Subfields (requires ``field_type`` of 'RECORD'). + fields: Subfields (requires ``field_type`` of 'RECORD'). - policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + policy_tags: The policy tag list for the field. - precision (Optional[int]): + precision: Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. - scale (Optional[int]): + scale: Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - max_length (Optional[int]): - Maximim length of fields with STRING or BYTES type. - + max_length: Maximum length of fields with STRING or BYTES type. """ def __init__( self, - name, - field_type, - mode="NULLABLE", - description=_DEFAULT_VALUE, - fields=(), - policy_tags=None, - precision=_DEFAULT_VALUE, - scale=_DEFAULT_VALUE, - max_length=_DEFAULT_VALUE, + name: str, + field_type: str, + mode: str = "NULLABLE", + description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, + fields: Iterable["SchemaField"] = (), + policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, + precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): self._properties = { "name": name, @@ -105,28 +120,12 @@ def __init__( self._properties["scale"] = scale if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length + if policy_tags is not _DEFAULT_VALUE: + self._properties["policyTags"] = ( + policy_tags.to_api_repr() if policy_tags is not None else None + ) self._fields = tuple(fields) - self._policy_tags = self._determine_policy_tags(field_type, policy_tags) - - @staticmethod - def _determine_policy_tags( - field_type: str, given_policy_tags: Optional["PolicyTagList"] - ) -> Optional["PolicyTagList"]: - """Return the given policy tags, or their suitable representation if `None`. - - Args: - field_type: The type of the schema field. - given_policy_tags: The policy tags to maybe ajdust. - """ - if given_policy_tags is not None: - return given_policy_tags - - if field_type is not None and field_type.upper() in _STRUCT_TYPES: - return None - - return PolicyTagList() - @staticmethod def __get_int(api_repr, name): v = api_repr.get(name, _DEFAULT_VALUE) @@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) - policy_tags = cls._determine_policy_tags( - field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) - ) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: + policy_tags = PolicyTagList.from_api_repr(policy_tags) return cls( field_type=field_type, @@ -230,7 +229,8 @@ def policy_tags(self): """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list definition for this field. """ - return self._policy_tags + resource = self._properties.get("policyTags") + return PolicyTagList.from_api_repr(resource) if resource is not None else None def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -244,10 +244,6 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - else: - # Explicitly include policy tag definition (we must not do it for RECORD - # fields, because those are not leaf fields). - answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. return answer @@ -272,7 +268,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6c8da4d23..f6f95c184 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self): mode=old_field.mode, description=None, fields=old_field.fields, - policy_tags=None, + policy_tags=PolicyTagList(), ) table.schema = new_schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertFalse(updated_table.schema[1].description) # Empty string or None. - self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + # policyTags key expected to be missing from response. + self.assertIsNone(updated_table.schema[1].policy_tags) def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index cbe087dac..5a0c5a83f 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -484,13 +484,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d2a75413f..eb70470b5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1024,18 +1024,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1069,18 +1059,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2003,6 +1983,7 @@ def test_update_routine(self): def test_update_table(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2029,7 +2010,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2040,7 +2020,15 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + # Explicly setting policyTags to no names should be included in the sent resource. + # https://github.com/googleapis/python-bigquery/issues/981 + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description=None, + policy_tags=PolicyTagList(names=()), + ), SchemaField( "age", "INTEGER", mode="REQUIRED", description="New field description" ), @@ -2078,7 +2066,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2197,21 +2184,14 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", - "policyTags": {"names": []}, - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE", - "policyTags": {"names": []}, }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6795,7 +6775,13 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) - assert field["policyTags"]["names"] == [] + # Avoid accidentally updating policy tags when not explicitly included. + # https://github.com/googleapis/python-bigquery/issues/981 + # Also, avoid 403 if someone has permission to write to table but + # not update policy tags by omitting policy tags we might have + # received from a get table request. + # https://github.com/googleapis/python-bigquery/pull/557 + assert "policyTags" not in field # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field @@ -8069,21 +8055,18 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -8116,21 +8099,18 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 1f49dba5d..3dc9dd179 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,14 +78,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d0b5ca54c..2180e1f6e 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -42,15 +42,40 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, PolicyTagList()) + self.assertIsNone(field.policy_tags) def test_constructor_explicit(self): - field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) + self.assertEqual( + field.policy_tags, + PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) + + def test_constructor_explicit_none(self): + field = self._make_one("test", "STRING", description=None, policy_tags=None) + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -66,20 +91,6 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) - def test_constructor_with_policy_tags(self): - from google.cloud.bigquery.schema import PolicyTagList - - policy = PolicyTagList(names=("foo", "bar")) - field = self._make_one( - "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy - ) - self.assertEqual(field.name, "test") - self.assertEqual(field.field_type, "STRING") - self.assertEqual(field.mode, "REQUIRED") - self.assertEqual(field.description, "Testing") - self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, policy) - def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -88,17 +99,28 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) - field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) + field = self._make_one( + "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + ) self.assertEqual( field.to_api_repr(), { "mode": "NULLABLE", "name": "foo", "type": "INTEGER", + "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, }, ) + def test_to_api_repr_omits_unset_properties(self): + # Prevent accidentally modifying fields that aren't explicitly set. + # https://github.com/googleapis/python-bigquery/issues/981 + field = self._make_one("foo", "INTEGER") + resource = field.to_api_repr() + self.assertNotIn("description", resource) + self.assertNotIn("policyTags", resource) + def test_to_api_repr_with_subfield(self): for record_type in ("RECORD", "STRUCT"): subfield = self._make_one("bar", "INTEGER", "NULLABLE") @@ -106,14 +128,7 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "policyTags": {"names": []}, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -163,9 +178,15 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, "foo") self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") - self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) + # Keys not present in API representation shouldn't be included in + # _properties. + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) + self.assertNotIn("description", field._properties) + self.assertNotIn("policyTags", field._properties) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -567,22 +588,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -608,7 +617,6 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, - "policyTags": {"names": []}, }, ) self.assertEqual( @@ -618,7 +626,6 @@ def test_w_description(self): "type": "INTEGER", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, ) @@ -634,13 +641,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -649,18 +650,8 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) @@ -872,83 +863,43 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="NUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict( - name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="STRING", mode="NULLABLE"), ), ( dict(name="n", field_type="STRING", max_length=9), - dict( - name="n", - type="STRING", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="BYTES", mode="NULLABLE"), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict( - name="n", - type="BYTES", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), ), ], )