From 748cceb017d2cb050873b2b9989b73d95d2c04f1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 19 Jan 2021 13:26:00 +0100 Subject: [PATCH 01/10] fix: error using empty array of structs parameter --- google/cloud/bigquery/query.py | 47 +++++++++++++++++++++++++++------- tests/system.py | 15 +++++++++++ tests/unit/test_query.py | 29 +++++++++++++++++++++ 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f2ed6337e..83009d1a4 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -186,31 +186,56 @@ class ArrayQueryParameter(_AbstractQueryParameter): array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. - - values (List[appropriate scalar type]): The parameter array values. + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, + or `'STRUCT'`/`'RECORD'`. + + values (List[appropriate type]): The parameter array values. + + struct_item_type (Optional[google.cloud.bigquery.query.StructQueryParameter]): + The type of array elements. The argument is generally not used, but is + required if ``array_type`` is ``'STRUCT'``/``'RECORD'`` and ``values`` + is empty. + This is because the backend requires detailed type information about + array elements, but that cannot be determined for ``'STRUCT'`` items + if there are no elements in the array. """ - def __init__(self, name, array_type, values): + def __init__(self, name, array_type, values, struct_item_type=None): self.name = name self.array_type = array_type self.values = values + if not values and array_type in {"RECORD", "STRUCT"}: + if struct_item_type is None: + raise ValueError("Missing struct item type info for an empty array.") + self._struct_item_type_api = struct_item_type.to_api_repr()["parameterType"] + else: + self._struct_item_type_api = None # won't be used + @classmethod - def positional(cls, array_type, values): + def positional(cls, array_type, values, struct_item_type=None): """Factory for positional parameters. Args: array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, + or `'STRUCT'`/`'RECORD'`. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. + + struct_item_type (Optional[google.cloud.bigquery.query.StructQueryParameter]): + The type of array elements. The argument is generally not used, but is + required if ``array_type`` is ``'STRUCT'``/``'RECORD'`` and ``values`` + is empty. + This is because the backend requires detailed type information about + array elements, but that cannot be determined for ``'STRUCT'`` items + if there are no elements in the array. Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name """ - return cls(None, array_type, values) + return cls(None, array_type, values, struct_item_type=struct_item_type) @classmethod def _from_api_repr_struct(cls, resource): @@ -265,8 +290,12 @@ def to_api_repr(self): values = self.values if self.array_type == "RECORD" or self.array_type == "STRUCT": reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]["parameterType"] a_values = [repr_["parameterValue"] for repr_ in reprs] + + if reprs: + a_type = reprs[0]["parameterType"] + else: + a_type = self._struct_item_type_api else: a_type = {"type": self.array_type} converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) diff --git a/tests/system.py b/tests/system.py index 0fa5bc41e..8e074b1dc 100644 --- a/tests/system.py +++ b/tests/system.py @@ -2203,6 +2203,16 @@ def test_query_w_query_params(self): characters_param = ArrayQueryParameter( name=None, array_type="RECORD", values=[phred_param, bharney_param] ) + empty_struct_array_param = ArrayQueryParameter( + name="empty_array_param", + array_type="RECORD", + values=[], + struct_item_type=StructQueryParameter( + None, + ScalarQueryParameter(name="foo", type_="INT64", value=None), + ScalarQueryParameter(name="bar", type_="STRING", value=None), + ), + ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( "sidekick", bharney_name_param, bharney_age_param @@ -2293,6 +2303,11 @@ def test_query_w_query_params(self): ], "query_parameters": [characters_param], }, + { + "sql": "SELECT @empty_array_param", + "expected": [], + "query_parameters": [empty_struct_array_param], + }, { "sql": "SELECT @roles", "expected": { diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index cf268daf1..66ad018ad 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -330,6 +330,10 @@ def test_ctor(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_ctor_empty_struct_array_wo_type_info(self): + with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"): + self._make_one(name="foo", array_type="STRUCT", values=[]) + def test___eq__(self): param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) @@ -493,6 +497,31 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_empty_array_of_records_type(self): + from google.cloud.bigquery.query import StructQueryParameter + + EXPECTED = { + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, + ], + }, + }, + "parameterValue": {"arrayValues": []}, + } + one = _make_subparam("foo", "STRING", None) + another = _make_subparam("bar", "INT64", None) + struct = StructQueryParameter.positional(one, another) + klass = self._get_target_class() + param = klass.positional( + array_type="RECORD", values=[], struct_item_type=struct + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING", ["value"]) other = object() From 7db73c9886b1efb867deb863a7f7cfaca0c559dc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 3 Feb 2021 11:42:00 +0100 Subject: [PATCH 02/10] Add QueryParameterType classes --- google/cloud/bigquery/__init__.py | 6 + google/cloud/bigquery/query.py | 220 +++++++++++++++++++++ tests/unit/test_query.py | 308 ++++++++++++++++++++++++++++++ 3 files changed, 534 insertions(+) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 41f987228..891f3c046 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -66,8 +66,11 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import Routine @@ -92,6 +95,9 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "ArrayQueryParameterType", + "ScalarQueryParameterType", + "StructQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 83009d1a4..5fe3b1628 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -48,6 +48,226 @@ def __ne__(self, other): return not self == other +class _AbstractQueryParameterType: + """Base class for representing query parameter types. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype + """ + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.QueryParameterType: Instance + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + raise NotImplementedError + + +class ScalarQueryParameterType(_AbstractQueryParameterType): + """Type representation for scalar query parameters. + + Args: + type_ (str): + One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', + 'DATETIME', or 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, type_, *, name=None, description=None): + self._type = type_ + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + type_ = resource["type"] + return cls(type_) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + return {"type": self._type} + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._type!r}{name}{description})" + + +class ArrayQueryParameterType(_AbstractQueryParameterType): + """Type representation for array query parameters. + + Args: + array_type (Union[ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, array_type, *, name=None, description=None): + self._array_type = array_type + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ArrayQueryParameterType: Instance + """ + array_item_type = resource["arrayType"]["type"] + + if array_item_type in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + else: + klass = ScalarQueryParameterType + + item_type_instance = klass.from_api_repr(resource["arrayType"]) + return cls(item_type_instance) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + return { + "type": "ARRAY", + "arrayType": self._array_type.to_api_repr(), + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._array_type!r}{name}{description})" + + +class StructQueryParameterType(_AbstractQueryParameterType): + """Type representation for struct query parameters. + + Args: + subtypes (Iterable[Union[ \ + ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ + ]]): + An non-empty iterable describing the struct's field types. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, *subtypes, name=None, description=None): + self._subtypes = [type_ for type_ in subtypes] # make a shallow copy + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.StructQueryParameterType: Instance + """ + subtypes = [] + + for struct_subtype in resource["structTypes"]: + type_repr = struct_subtype["type"] + if type_repr["type"] in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + elif type_repr["type"] == "ARRAY": + klass = ArrayQueryParameterType + else: + klass = ScalarQueryParameterType + + type_instance = klass.from_api_repr(type_repr) + type_instance.name = struct_subtype.get("name") + type_instance.description = struct_subtype.get("description") + subtypes.append(type_instance) + + return cls(*subtypes) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + struct_types = [] + + for subtype in self._subtypes: + item = {"type": subtype.to_api_repr()} + if subtype.name is not None: + item["name"] = subtype.name + if subtype.description is not None: + item["description"] = subtype.description + + struct_types.append(item) + + return { + "type": "STRUCT", + "structTypes": struct_types, + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + items = ", ".join(repr(subtype) for subtype in self._subtypes) + return f"{self.__class__.__name__}({items}{name}{description})" + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 66ad018ad..aa2390afc 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -43,6 +43,314 @@ def test___eq__(self): self.assertNotEqual(udf, wrong_type) +class Test__AbstractQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameterType + + return _AbstractQueryParameterType + + @classmethod + def _make_one(cls, *args, **kw): + return cls._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param_type = self._make_one() + with self.assertRaises(NotImplementedError): + param_type.to_api_repr() + + +class Test_ScalarQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameterType + + return ScalarQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + klass = self._get_target_class() + result = klass.from_api_repr({"type": "BOOLEAN"}) + self.assertEqual(result._type, "BOOLEAN") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + param_type = self._make_one("BYTES", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, {"type": "BYTES"}) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BYTES") + self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("BYTES", name="foo", description="this is foo") + self.assertEqual( + repr(param_type), + "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", + ) + + +class Test_ArrayQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameterType + + return ArrayQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import StructQueryParameterType + + api_resource = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + item_type = result._array_type + self.assertIsInstance(item_type, StructQueryParameterType) + + self.assertIsNone(item_type.name) + self.assertIsNone(item_type.description) + + subtype = item_type._subtypes[0] + self.assertEqual(subtype.name, "weight") + self.assertEqual(subtype.description, "in kg") + self.assertEqual(subtype._type, "INTEGER") + + subtype = item_type._subtypes[1] + self.assertEqual(subtype.name, "last_name") + self.assertIsNone(subtype.description) + self.assertEqual(subtype._type, "STRING") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + array_item_type = StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="weight", description="in kg"), + ScalarQueryParameterType("STRING", name="last_name"), + ) + param_type = self._make_one(array_item_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BOOLEAN") + self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("INT64", name="bar", description="this is bar") + self.assertEqual( + repr(param_type), + "ArrayQueryParameterType('INT64', name='bar', description='this is bar')", + ) + + +class Test_StructQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameterType + + return StructQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import ArrayQueryParameterType + from google.cloud.bigquery.query import ScalarQueryParameterType + + api_resource = { + "type": "STRUCT", + "structTypes": [ + { + "name": "age", + "type": {"type": "INTEGER"}, + "description": "in years", + }, + { + "name": "aliases", + "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + }, + { + "description": "a nested struct", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + self.assertEqual(len(result._subtypes), 3) + + subtype = result._subtypes[0] + self.assertIsInstance(subtype, ScalarQueryParameterType) + self.assertEqual(subtype.name, "age") + self.assertEqual(subtype.description, "in years") + + subtype = result._subtypes[1] + self.assertIsInstance(subtype, ArrayQueryParameterType) + self.assertEqual(subtype.name, "aliases") + self.assertIsNone(subtype.description) + self.assertIsInstance(subtype._array_type, ScalarQueryParameterType) + self.assertEqual(subtype._array_type._type, "STRING") + + subtype = result._subtypes[2] + self.assertIsInstance(subtype, self._get_target_class()) + self.assertIsNone(subtype.name) + self.assertEqual(subtype.description, "a nested struct") + + date_field = subtype._subtypes[0] + self.assertEqual(date_field._type, "DATE") + self.assertEqual(date_field.name, "nested_date") + self.assertIsNone(date_field.description) + + bool_field = subtype._subtypes[1] + self.assertEqual(bool_field._type, "BOOLEAN") + self.assertIsNone(bool_field.name) + self.assertEqual(bool_field.description, "nested bool field") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + date_type = ScalarQueryParameterType("DATE", name="day_of_birth") + param_type = self._make_one(int_type, date_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + {"name": "day_of_birth", "type": {"type": "DATE"}}, + ], + } + self.assertEqual(result, expected_result) + + def test_to_api_repr_nested(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + struct_class = self._get_target_class() + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + nested_struct_type = struct_class( + ScalarQueryParameterType("DATE", name="nested_date"), + ScalarQueryParameterType("BOOLEAN", description="nested bool field"), + name="nested", + ) + param_type = self._make_one( + int_type, nested_struct_type, name="foo", description="bar" + ) + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + { + "name": "nested", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING") + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')" + ")" + ) + self.assertEqual(repr(param_type), expected) + + def test_repr_all_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), + ScalarQueryParameterType("STRING"), + name="data_record", + description="this is it", + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), " + "name='data_record', description='this is it'" + ")" + ) + self.assertEqual(repr(param_type), expected) + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): From e822aebd05f99416ff7e8d3c55454f4a2a8ad0aa Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 3 Feb 2021 14:44:27 +0100 Subject: [PATCH 03/10] Use query parameter types with ArrayQueryParameter --- google/cloud/bigquery/query.py | 87 +++++++++++++++++++--------------- tests/unit/test_query.py | 56 ++++++++++++++++++---- 2 files changed, 96 insertions(+), 47 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 5fe3b1628..0d03c7b7a 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -404,58 +404,48 @@ class ArrayQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, - or `'STRUCT'`/`'RECORD'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, + `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. values (List[appropriate type]): The parameter array values. - - struct_item_type (Optional[google.cloud.bigquery.query.StructQueryParameter]): - The type of array elements. The argument is generally not used, but is - required if ``array_type`` is ``'STRUCT'``/``'RECORD'`` and ``values`` - is empty. - This is because the backend requires detailed type information about - array elements, but that cannot be determined for ``'STRUCT'`` items - if there are no elements in the array. """ - def __init__(self, name, array_type, values, struct_item_type=None): + def __init__(self, name, array_type, values): self.name = name - self.array_type = array_type self.values = values - if not values and array_type in {"RECORD", "STRUCT"}: - if struct_item_type is None: - raise ValueError("Missing struct item type info for an empty array.") - self._struct_item_type_api = struct_item_type.to_api_repr()["parameterType"] - else: - self._struct_item_type_api = None # won't be used + if isinstance(array_type, str): + if not values and array_type in {"RECORD", "STRUCT"}: + raise ValueError( + "Missing detailed struct item type info for an empty array, " + "please provide a StructQueryParameterType instance." + ) + self.array_type = array_type @classmethod - def positional(cls, array_type, values, struct_item_type=None): + def positional(cls, array_type, values): """Factory for positional parameters. Args: - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, - or `'STRUCT'`/`'RECORD'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, + `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. values (List[appropriate type]): The parameter array values. - struct_item_type (Optional[google.cloud.bigquery.query.StructQueryParameter]): - The type of array elements. The argument is generally not used, but is - required if ``array_type`` is ``'STRUCT'``/``'RECORD'`` and ``values`` - is empty. - This is because the backend requires detailed type information about - array elements, but that cannot be determined for ``'STRUCT'`` items - if there are no elements in the array. - Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name """ - return cls(None, array_type, values, struct_item_type=struct_item_type) + return cls(None, array_type, values) @classmethod def _from_api_repr_struct(cls, resource): @@ -508,26 +498,38 @@ def to_api_repr(self): Dict: JSON mapping """ values = self.values - if self.array_type == "RECORD" or self.array_type == "STRUCT": + + if self.array_type in {"RECORD", "STRUCT"} or isinstance( + self.array_type, StructQueryParameterType + ): reprs = [value.to_api_repr() for value in values] a_values = [repr_["parameterValue"] for repr_ in reprs] if reprs: a_type = reprs[0]["parameterType"] else: - a_type = self._struct_item_type_api + # The constructor disallows STRUCT/RECORD type when empty values. + assert isinstance(self.array_type, StructQueryParameterType) + a_type = self.array_type.to_api_repr() else: - a_type = {"type": self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + # Scalar array item type. + if isinstance(self.array_type, str): + a_type = {"type": self.array_type} + else: + a_type = self.array_type.to_api_repr() + + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: values = [converter(value) for value in values] a_values = [{"value": value} for value in values] + resource = { "parameterType": {"type": "ARRAY", "arrayType": a_type}, "parameterValue": {"arrayValues": a_values}, } if self.name is not None: resource["name"] = self.name + return resource def _key(self): @@ -538,7 +540,14 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return (self.name, self.array_type.upper(), self.values) + if isinstance(self.array_type, str): + item_type = self.array_type + elif isinstance(self.array_type, ScalarQueryParameterType): + item_type = self.array_type._type + else: + item_type = "STRUCT" + + return (self.name, item_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index aa2390afc..4b91664aa 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -769,6 +769,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_array_type_as_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + EXPECTED = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}}, + "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]}, + } + klass = self._get_target_class() + param = klass.positional( + array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): EXPECTED = { "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, @@ -806,7 +819,8 @@ def test_to_api_repr_w_record_type(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_empty_array_of_records_type(self): - from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType EXPECTED = { "parameterType": { @@ -821,13 +835,12 @@ def test_to_api_repr_w_empty_array_of_records_type(self): }, "parameterValue": {"arrayValues": []}, } - one = _make_subparam("foo", "STRING", None) - another = _make_subparam("bar", "INT64", None) - struct = StructQueryParameter.positional(one, another) - klass = self._get_target_class() - param = klass.positional( - array_type="RECORD", values=[], struct_item_type=struct + item_type = StructQueryParameterType( + ScalarQueryParameterType("STRING", name="foo"), + ScalarQueryParameterType("INT64", name="bar"), ) + klass = self._get_target_class() + param = klass.positional(array_type=item_type, values=[]) self.assertEqual(param.to_api_repr(), EXPECTED) def test___eq___wrong_type(self): @@ -874,11 +887,38 @@ def test___ne___different_values(self): field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) - def test___repr__(self): + def test___repr__array_type_str(self): field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) + def test___repr__array_type_scalar_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_items = self._make_one( + "int_items", ScalarQueryParameterType("INTEGER"), [64] + ) + expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])" + self.assertEqual(repr(int_items), expected) + + def test___repr__array_type_struct_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + struct_items = self._make_one( + "struct_items", + StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="age"), + ScalarQueryParameterType("STRING", name="last_name"), + ), + [{"age": 18, "last_name": "Doe"}], + ) + expected = ( + "ArrayQueryParameter('struct_items', 'STRUCT', " + "[{'age': 18, 'last_name': 'Doe'}])" + ) + self.assertEqual(repr(struct_items), expected) + class Test_StructQueryParameter(unittest.TestCase): @staticmethod From 38d80b83f8a25463ed1dbf98ae9fc76fa322d1a4 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 4 Feb 2021 13:33:24 +0100 Subject: [PATCH 04/10] Adjust system test to changed ArrayQueryParameter --- tests/system/test_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 1af3c7b3d..7ae615864 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2140,7 +2140,9 @@ def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import StructQueryParameterType question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( @@ -2197,12 +2199,10 @@ def test_query_w_query_params(self): ) empty_struct_array_param = ArrayQueryParameter( name="empty_array_param", - array_type="RECORD", values=[], - struct_item_type=StructQueryParameter( - None, - ScalarQueryParameter(name="foo", type_="INT64", value=None), - ScalarQueryParameter(name="bar", type_="STRING", value=None), + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), ), ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) From 62a31e7c5614c3ec88bc475d768d5b83ef877b49 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 10:09:21 +0100 Subject: [PATCH 05/10] Clarify a comment about an assertion Co-authored-by: Tim Swast --- google/cloud/bigquery/query.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 0d03c7b7a..1abd3a3b1 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -508,7 +508,9 @@ def to_api_repr(self): if reprs: a_type = reprs[0]["parameterType"] else: - # The constructor disallows STRUCT/RECORD type when empty values. + # This assertion always evaluates to True because the + # constructor disallows STRUCT/RECORD type defined as a + # string with empty values. assert isinstance(self.array_type, StructQueryParameterType) a_type = self.array_type.to_api_repr() else: From bf7cf6f683bda1dd26088efa526e0924c25ed652 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 10:46:01 +0100 Subject: [PATCH 06/10] Clarify when name/descr. is omitted from API repr --- google/cloud/bigquery/query.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 1abd3a3b1..a4c5e6d71 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -114,6 +114,9 @@ def to_api_repr(self): Returns: Dict: JSON mapping """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. return {"type": self._type} def __repr__(self): @@ -171,6 +174,9 @@ def to_api_repr(self): Returns: Dict: JSON mapping """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. return { "type": "ARRAY", "arrayType": self._array_type.to_api_repr(), From 8255028f2c40af48a5ab417704f913dd5dcc5e4d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 11:11:21 +0100 Subject: [PATCH 07/10] Rename subtypes to fields --- google/cloud/bigquery/query.py | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index a4c5e6d71..f78e6d0e4 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -196,7 +196,7 @@ class StructQueryParameterType(_AbstractQueryParameterType): """Type representation for struct query parameters. Args: - subtypes (Iterable[Union[ \ + fields (Iterable[Union[ \ ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ ]]): An non-empty iterable describing the struct's field types. @@ -208,8 +208,8 @@ class StructQueryParameterType(_AbstractQueryParameterType): one of the subfields in ``StructQueryParameterType`` instance. """ - def __init__(self, *subtypes, name=None, description=None): - self._subtypes = [type_ for type_ in subtypes] # make a shallow copy + def __init__(self, *fields, name=None, description=None): + self._fields = [type_ for type_ in fields] # make a shallow copy self.name = name self.description = description @@ -223,10 +223,10 @@ def from_api_repr(cls, resource): Returns: google.cloud.bigquery.query.StructQueryParameterType: Instance """ - subtypes = [] + fields = [] - for struct_subtype in resource["structTypes"]: - type_repr = struct_subtype["type"] + for struct_field in resource["structTypes"]: + type_repr = struct_field["type"] if type_repr["type"] in {"STRUCT", "RECORD"}: klass = StructQueryParameterType elif type_repr["type"] == "ARRAY": @@ -235,11 +235,11 @@ def from_api_repr(cls, resource): klass = ScalarQueryParameterType type_instance = klass.from_api_repr(type_repr) - type_instance.name = struct_subtype.get("name") - type_instance.description = struct_subtype.get("description") - subtypes.append(type_instance) + type_instance.name = struct_field.get("name") + type_instance.description = struct_field.get("description") + fields.append(type_instance) - return cls(*subtypes) + return cls(*fields) def to_api_repr(self): """Construct JSON API representation for the parameter type. @@ -247,20 +247,20 @@ def to_api_repr(self): Returns: Dict: JSON mapping """ - struct_types = [] + fields = [] - for subtype in self._subtypes: - item = {"type": subtype.to_api_repr()} - if subtype.name is not None: - item["name"] = subtype.name - if subtype.description is not None: - item["description"] = subtype.description + for field in self._fields: + item = {"type": field.to_api_repr()} + if field.name is not None: + item["name"] = field.name + if field.description is not None: + item["description"] = field.description - struct_types.append(item) + fields.append(item) return { "type": "STRUCT", - "structTypes": struct_types, + "structTypes": fields, } def __repr__(self): @@ -270,7 +270,7 @@ def __repr__(self): if self.description is not None else "" ) - items = ", ".join(repr(subtype) for subtype in self._subtypes) + items = ", ".join(repr(field) for field in self._fields) return f"{self.__class__.__name__}({items}{name}{description})" From 4411f6d2481aa17944b20e9109ca0256653897d0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 11:26:00 +0100 Subject: [PATCH 08/10] Add fields property to StructQueryParameterType --- google/cloud/bigquery/query.py | 6 +++- tests/unit/test_query.py | 58 +++++++++++++++++----------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f78e6d0e4..5908eea22 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -209,10 +209,14 @@ class StructQueryParameterType(_AbstractQueryParameterType): """ def __init__(self, *fields, name=None, description=None): - self._fields = [type_ for type_ in fields] # make a shallow copy + self._fields = fields # fields is a tuple (immutable), no shallow copy needed self.name = name self.description = description + @property + def fields(self): + return self._fields # no copy needed, self._fields is an immutable sequence + @classmethod def from_api_repr(cls, resource): """Factory: construct parameter type from JSON resource. diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 4b91664aa..11ec948c0 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -138,15 +138,15 @@ def test_from_api_repr(self): self.assertIsNone(item_type.name) self.assertIsNone(item_type.description) - subtype = item_type._subtypes[0] - self.assertEqual(subtype.name, "weight") - self.assertEqual(subtype.description, "in kg") - self.assertEqual(subtype._type, "INTEGER") + field = item_type.fields[0] + self.assertEqual(field.name, "weight") + self.assertEqual(field.description, "in kg") + self.assertEqual(field._type, "INTEGER") - subtype = item_type._subtypes[1] - self.assertEqual(subtype.name, "last_name") - self.assertIsNone(subtype.description) - self.assertEqual(subtype._type, "STRING") + field = item_type.fields[1] + self.assertEqual(field.name, "last_name") + self.assertIsNone(field.description) + self.assertEqual(field._type, "STRING") def test_to_api_repr(self): from google.cloud.bigquery.query import ScalarQueryParameterType @@ -235,31 +235,31 @@ def test_from_api_repr(self): self.assertIsNone(result.name) self.assertIsNone(result.description) - self.assertEqual(len(result._subtypes), 3) - - subtype = result._subtypes[0] - self.assertIsInstance(subtype, ScalarQueryParameterType) - self.assertEqual(subtype.name, "age") - self.assertEqual(subtype.description, "in years") - - subtype = result._subtypes[1] - self.assertIsInstance(subtype, ArrayQueryParameterType) - self.assertEqual(subtype.name, "aliases") - self.assertIsNone(subtype.description) - self.assertIsInstance(subtype._array_type, ScalarQueryParameterType) - self.assertEqual(subtype._array_type._type, "STRING") - - subtype = result._subtypes[2] - self.assertIsInstance(subtype, self._get_target_class()) - self.assertIsNone(subtype.name) - self.assertEqual(subtype.description, "a nested struct") - - date_field = subtype._subtypes[0] + self.assertEqual(len(result.fields), 3) + + field = result.fields[0] + self.assertIsInstance(field, ScalarQueryParameterType) + self.assertEqual(field.name, "age") + self.assertEqual(field.description, "in years") + + field = result.fields[1] + self.assertIsInstance(field, ArrayQueryParameterType) + self.assertEqual(field.name, "aliases") + self.assertIsNone(field.description) + self.assertIsInstance(field._array_type, ScalarQueryParameterType) + self.assertEqual(field._array_type._type, "STRING") + + field = result.fields[2] + self.assertIsInstance(field, self._get_target_class()) + self.assertIsNone(field.name) + self.assertEqual(field.description, "a nested struct") + + date_field = field.fields[0] self.assertEqual(date_field._type, "DATE") self.assertEqual(date_field.name, "nested_date") self.assertIsNone(date_field.description) - bool_field = subtype._subtypes[1] + bool_field = field.fields[1] self.assertEqual(bool_field._type, "BOOLEAN") self.assertIsNone(bool_field.name) self.assertEqual(bool_field.description, "nested bool field") From a02c3ee50db7f1f3868fdf4136d7a8f2e94111ed Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 11:35:14 +0100 Subject: [PATCH 09/10] Add a check for empty struct fields --- google/cloud/bigquery/query.py | 3 +++ tests/unit/test_query.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 5908eea22..1a8f4b3e6 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -209,6 +209,9 @@ class StructQueryParameterType(_AbstractQueryParameterType): """ def __init__(self, *fields, name=None, description=None): + if not fields: + raise ValueError("Struct type must have at least one field defined.") + self._fields = fields # fields is a tuple (immutable), no shallow copy needed self.name = name self.description = description diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 11ec948c0..d60fdc677 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -198,6 +198,10 @@ def _get_target_class(): def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) + def test_raises_error_without_any_fields(self): + with self.assertRaisesRegex(ValueError, ".*at least one field.*"): + self._make_one() + def test_from_api_repr(self): from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameterType From 058aecf61d44265973ee35ec174592a71ac300af Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 20 Feb 2021 12:46:44 +0100 Subject: [PATCH 10/10] Define scalar SQL parameter types as type objects --- google/cloud/bigquery/enums.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e353b3132..b378f091b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -18,6 +18,7 @@ import itertools from google.cloud.bigquery_v2 import types as gapic_types +from google.cloud.bigquery.query import ScalarQueryParameterType class Compression(object): @@ -215,6 +216,26 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + STRING = ScalarQueryParameterType("STRING") + BYTES = ScalarQueryParameterType("BYTES") + INTEGER = ScalarQueryParameterType("INT64") + INT64 = ScalarQueryParameterType("INT64") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BOOLEAN = ScalarQueryParameterType("BOOL") + BOOL = ScalarQueryParameterType("BOOL") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + DATE = ScalarQueryParameterType("DATE") + TIME = ScalarQueryParameterType("TIME") + DATETIME = ScalarQueryParameterType("DATETIME") + + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists.