Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
feat: Extended DB API parameter syntax to optionally provide paramete…
…r types (#626)

* Added explicit type documentation.

* Extended query-parameter system for specifying parameter types.assed.

* Serialize non-floats (e.g. Decimals) using  in FLOAT64 parameters.

Co-authored-by: Tim Swast <swast@google.com>

* De-reference aliases in SqlParameterScalarTypes when checking types

Co-authored-by: Tim Swast <swast@google.com>
  • Loading branch information
jimfulton and tswast committed Apr 29, 2021
1 parent c085186 commit 8bcf397
Show file tree
Hide file tree
Showing 8 changed files with 396 additions and 60 deletions.
37 changes: 37 additions & 0 deletions docs/dbapi.rst
Expand Up @@ -4,3 +4,40 @@ DB-API Reference
.. automodule:: google.cloud.bigquery.dbapi
:members:
:show-inheritance:


DB-API Query-Parameter Syntax
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The BigQuery DB-API uses the `qmark` `parameter style
<https://www.python.org/dev/peps/pep-0249/#paramstyle>`_ for
unnamed/positional parameters and the `pyformat` parameter style for
named parameters.

An example of a query using unnamed parameters::

insert into people (name, income) values (?, ?)

and using named parameters::

insert into people (name, income) values (%(name)s, %(income)s)

Providing explicit type information
-----------------------------------

BigQuery requires type information for parameters. The The BigQuery
DB-API can usually determine parameter types for parameters based on
provided values. Sometimes, however, types can't be determined (for
example when `None` is passed) or are determined incorrectly (for
example when passing a floating-point value to a numeric column).

The BigQuery DB-API provides an extended parameter syntax. For named
parameters, a BigQuery type is provided after the name separated by a
colon, as in::

insert into people (name, income) values (%(name:string)s, %(income:numeric)s)

For unnamed parameters, use the named syntax with a type, but now
name, as in::

insert into people (name, income) values (%(:string)s, %(:numeric)s)
2 changes: 1 addition & 1 deletion google/cloud/bigquery/_helpers.py
Expand Up @@ -275,7 +275,7 @@ def _int_to_json(value):

def _float_to_json(value):
"""Coerce 'value' to an JSON-compatible representation."""
return value
return value if value is None else float(value)


def _decimal_to_json(value):
Expand Down
107 changes: 73 additions & 34 deletions google/cloud/bigquery/dbapi/_helpers.py
Expand Up @@ -20,15 +20,36 @@
import numbers

from google.cloud import bigquery
from google.cloud.bigquery import table
from google.cloud.bigquery import table, enums
from google.cloud.bigquery.dbapi import exceptions


_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28")
_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28")


def scalar_to_query_parameter(value, name=None):
def _parameter_type(name, value, query_parameter_type=None, value_doc=""):
if query_parameter_type:
try:
parameter_type = getattr(
enums.SqlParameterScalarTypes, query_parameter_type.upper()
)._type
except AttributeError:
raise exceptions.ProgrammingError(
f"The given parameter type, {query_parameter_type},"
f" for {name} is not a valid BigQuery scalar type."
)
else:
parameter_type = bigquery_scalar_type(value)
if parameter_type is None:
raise exceptions.ProgrammingError(
f"Encountered parameter {name} with "
f"{value_doc} value {value} of unexpected type."
)
return parameter_type


def scalar_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert a scalar value into a query parameter.
Args:
Expand All @@ -37,6 +58,7 @@ def scalar_to_query_parameter(value, name=None):
name (str):
(Optional) Name of the query parameter.
query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
google.cloud.bigquery.ScalarQueryParameter:
Expand All @@ -47,24 +69,19 @@ def scalar_to_query_parameter(value, name=None):
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
if the type cannot be determined.
"""
parameter_type = bigquery_scalar_type(value)

if parameter_type is None:
raise exceptions.ProgrammingError(
"encountered parameter {} with value {} of unexpected type".format(
name, value
)
)
return bigquery.ScalarQueryParameter(name, parameter_type, value)
return bigquery.ScalarQueryParameter(
name, _parameter_type(name, value, query_parameter_type), value
)


def array_to_query_parameter(value, name=None):
def array_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert an array-like value into a query parameter.
Args:
value (Sequence[Any]): The elements of the array (should not be a
string-like Sequence).
name (Optional[str]): Name of the query parameter.
query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
A query parameter corresponding with the type and value of the plain
Expand All @@ -80,53 +97,58 @@ def array_to_query_parameter(value, name=None):
"not string-like.".format(name)
)

if not value:
if query_parameter_type or value:
array_type = _parameter_type(
name,
value[0] if value else None,
query_parameter_type,
value_doc="array element ",
)
else:
raise exceptions.ProgrammingError(
"Encountered an empty array-like value of parameter {}, cannot "
"determine array elements type.".format(name)
)

# Assume that all elements are of the same type, and let the backend handle
# any type incompatibilities among the array elements
array_type = bigquery_scalar_type(value[0])
if array_type is None:
raise exceptions.ProgrammingError(
"Encountered unexpected first array element of parameter {}, "
"cannot determine array elements type.".format(name)
)

return bigquery.ArrayQueryParameter(name, array_type, value)


def to_query_parameters_list(parameters):
def to_query_parameters_list(parameters, parameter_types):
"""Converts a sequence of parameter values into query parameters.
Args:
parameters (Sequence[Any]): Sequence of query parameter values.
parameter_types:
A list of parameter types, one for each parameter.
Unknown types are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of query parameters.
"""
result = []

for value in parameters:
for value, type_ in zip(parameters, parameter_types):
if isinstance(value, collections_abc.Mapping):
raise NotImplementedError("STRUCT-like parameter values are not supported.")
elif array_like(value):
param = array_to_query_parameter(value)
param = array_to_query_parameter(value, None, type_)
else:
param = scalar_to_query_parameter(value)
param = scalar_to_query_parameter(value, None, type_)

result.append(param)

return result


def to_query_parameters_dict(parameters):
def to_query_parameters_dict(parameters, query_parameter_types):
"""Converts a dictionary of parameter values into query parameters.
Args:
parameters (Mapping[str, Any]): Dictionary of query parameter values.
parameter_types:
A dictionary of parameter types. It needn't have a key for each
parameter.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
Expand All @@ -140,21 +162,38 @@ def to_query_parameters_dict(parameters):
"STRUCT-like parameter values are not supported "
"(parameter {}).".format(name)
)
elif array_like(value):
param = array_to_query_parameter(value, name=name)
else:
param = scalar_to_query_parameter(value, name=name)
query_parameter_type = query_parameter_types.get(name)
if array_like(value):
param = array_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type
)
else:
param = scalar_to_query_parameter(
value, name=name, query_parameter_type=query_parameter_type,
)

result.append(param)

return result


def to_query_parameters(parameters):
def to_query_parameters(parameters, parameter_types):
"""Converts DB-API parameter values into query parameters.
Args:
parameters (Union[Mapping[str, Any], Sequence[Any]]):
A dictionary or sequence of query parameter values.
parameter_types (Union[Mapping[str, str], Sequence[str]]):
A dictionary or list of parameter types.
If parameters is a mapping, then this must be a dictionary
of parameter types. It needn't have a key for each
parameter.
If parameters is a sequence, then this must be a list of
parameter types, one for each paramater. Unknown types
are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
Expand All @@ -164,9 +203,9 @@ def to_query_parameters(parameters):
return []

if isinstance(parameters, collections_abc.Mapping):
return to_query_parameters_dict(parameters)

return to_query_parameters_list(parameters)
return to_query_parameters_dict(parameters, parameter_types)
else:
return to_query_parameters_list(parameters, parameter_types)


def bigquery_scalar_type(value):
Expand Down
93 changes: 85 additions & 8 deletions google/cloud/bigquery/dbapi/cursor.py
Expand Up @@ -18,6 +18,7 @@
from collections import abc as collections_abc
import copy
import logging
import re

try:
from google.cloud.bigquery_storage import ArrowSerializationOptions
Expand Down Expand Up @@ -161,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
job_config (google.cloud.bigquery.job.QueryJobConfig):
(Optional) Extra configuration options for the query job.
"""
formatted_operation, parameter_types = _format_operation(operation, parameters)
self._execute(
formatted_operation, parameters, job_id, job_config, parameter_types
)

def _execute(
self, formatted_operation, parameters, job_id, job_config, parameter_types
):
self._query_data = None
self._query_job = None
client = self.connection._client
Expand All @@ -169,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
# query parameters was not one of the standard options. Convert both
# the query and the parameters to the format expected by the client
# libraries.
formatted_operation = _format_operation(operation, parameters=parameters)
query_parameters = _helpers.to_query_parameters(parameters)
query_parameters = _helpers.to_query_parameters(parameters, parameter_types)

if client._default_query_job_config:
if job_config:
Expand Down Expand Up @@ -209,8 +217,19 @@ def executemany(self, operation, seq_of_parameters):
seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]):
Sequence of many sets of parameter values.
"""
for parameters in seq_of_parameters:
self.execute(operation, parameters)
if seq_of_parameters:
# There's no reason to format the line more than once, as
# the operation only barely depends on the parameters. So
# we just use the first set of parameters. If there are
# different numbers or types of parameters, we'll error
# anyway.
formatted_operation, parameter_types = _format_operation(
operation, seq_of_parameters[0]
)
for parameters in seq_of_parameters:
self._execute(
formatted_operation, parameters, None, None, parameter_types
)

def _try_fetch(self, size=None):
"""Try to start fetching data, if not yet started.
Expand Down Expand Up @@ -427,7 +446,7 @@ def _format_operation_dict(operation, parameters):
raise exceptions.ProgrammingError(exc)


def _format_operation(operation, parameters=None):
def _format_operation(operation, parameters):
"""Formats parameters in operation in way BigQuery expects.
Args:
Expand All @@ -445,9 +464,67 @@ def _format_operation(operation, parameters=None):
``parameters`` argument.
"""
if parameters is None or len(parameters) == 0:
return operation.replace("%%", "%") # Still do percent de-escaping.
return operation.replace("%%", "%"), None # Still do percent de-escaping.

operation, parameter_types = _extract_types(operation)
if parameter_types is None:
raise exceptions.ProgrammingError(
f"Parameters were provided, but {repr(operation)} has no placeholders."
)

if isinstance(parameters, collections_abc.Mapping):
return _format_operation_dict(operation, parameters)
return _format_operation_dict(operation, parameters), parameter_types

return _format_operation_list(operation, parameters), parameter_types


def _extract_types(
operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub
):
"""Remove type information from parameter placeholders.
For every parameter of the form %(name:type)s, replace with %(name)s and add the
item name->type to dict that's returned.
Returns operation without type information and a dictionary of names and types.
"""
parameter_types = None

def repl(m):
nonlocal parameter_types
prefix, name, type_ = m.groups()
if len(prefix) % 2:
# The prefix has an odd number of %s, the last of which
# escapes the % we're looking for, so we don't want to
# change anything.
return m.group(0)

try:
if name:
if not parameter_types:
parameter_types = {}
if type_:
if name in parameter_types:
if type_ != parameter_types[name]:
raise exceptions.ProgrammingError(
f"Conflicting types for {name}: "
f"{parameter_types[name]} and {type_}."
)
else:
parameter_types[name] = type_
else:
if not isinstance(parameter_types, dict):
raise TypeError()

return f"{prefix}%({name})s"
else:
if parameter_types is None:
parameter_types = []
parameter_types.append(type_)
return f"{prefix}%s"
except (AttributeError, TypeError):
raise exceptions.ProgrammingError(
f"{repr(operation)} mixes named and unamed parameters."
)

return _format_operation_list(operation, parameters)
return extra_type_sub(repl, operation), parameter_types

0 comments on commit 8bcf397

Please sign in to comment.