Skip to content

Commit

Permalink
chore: unit test coverage and additional refactoring for `spanner_dba…
Browse files Browse the repository at this point in the history
…pi` (#532)

Refactoring and testing changes.
  • Loading branch information
mf2199 committed Oct 26, 2020
1 parent 2f2cd86 commit 927e178
Show file tree
Hide file tree
Showing 20 changed files with 1,712 additions and 825 deletions.
125 changes: 35 additions & 90 deletions google/cloud/spanner_dbapi/__init__.py
Expand Up @@ -6,37 +6,39 @@

"""Connection-based DB API for Cloud Spanner."""

from google.cloud import spanner_v1

from .connection import Connection
from .exceptions import (
DatabaseError,
DataError,
Error,
IntegrityError,
InterfaceError,
InternalError,
NotSupportedError,
OperationalError,
ProgrammingError,
Warning,
)
from .parse_utils import get_param_types
from .types import (
BINARY,
DATETIME,
NUMBER,
ROWID,
STRING,
Binary,
Date,
DateFromTicks,
Time,
TimeFromTicks,
Timestamp,
TimestampFromTicks,
)
from .version import google_client_info
from google.cloud.spanner_dbapi.connection import Connection
from google.cloud.spanner_dbapi.connection import connect

from google.cloud.spanner_dbapi.cursor import Cursor

from google.cloud.spanner_dbapi.exceptions import DatabaseError
from google.cloud.spanner_dbapi.exceptions import DataError
from google.cloud.spanner_dbapi.exceptions import Error
from google.cloud.spanner_dbapi.exceptions import IntegrityError
from google.cloud.spanner_dbapi.exceptions import InterfaceError
from google.cloud.spanner_dbapi.exceptions import InternalError
from google.cloud.spanner_dbapi.exceptions import NotSupportedError
from google.cloud.spanner_dbapi.exceptions import OperationalError
from google.cloud.spanner_dbapi.exceptions import ProgrammingError
from google.cloud.spanner_dbapi.exceptions import Warning

from google.cloud.spanner_dbapi.parse_utils import get_param_types

from google.cloud.spanner_dbapi.types import BINARY
from google.cloud.spanner_dbapi.types import DATETIME
from google.cloud.spanner_dbapi.types import NUMBER
from google.cloud.spanner_dbapi.types import ROWID
from google.cloud.spanner_dbapi.types import STRING
from google.cloud.spanner_dbapi.types import Binary
from google.cloud.spanner_dbapi.types import Date
from google.cloud.spanner_dbapi.types import DateFromTicks
from google.cloud.spanner_dbapi.types import Time
from google.cloud.spanner_dbapi.types import TimeFromTicks
from google.cloud.spanner_dbapi.types import Timestamp
from google.cloud.spanner_dbapi.types import TimestampStr
from google.cloud.spanner_dbapi.types import TimestampFromTicks

from google.cloud.spanner_dbapi.version import DEFAULT_USER_AGENT

apilevel = "2.0" # supports DP-API 2.0 level.
paramstyle = "format" # ANSI C printf format codes, e.g. ...WHERE name=%s.
Expand All @@ -48,66 +50,10 @@
threadsafety = 1


def connect(
instance_id,
database_id,
project=None,
credentials=None,
pool=None,
user_agent=None,
):
"""
Create a connection to Cloud Spanner database.
:type instance_id: :class:`str`
:param instance_id: ID of the instance to connect to.
:type database_id: :class:`str`
:param database_id: The name of the database to connect to.
:type project: :class:`str`
:param project: (Optional) The ID of the project which owns the
instances, tables and data. If not provided, will
attempt to determine from the environment.
:type credentials: :class:`google.auth.credentials.Credentials`
:param credentials: (Optional) The authorization credentials to attach to requests.
These credentials identify this application to the service.
If none are specified, the client will attempt to ascertain
the credentials from the environment.
:type pool: Concrete subclass of
:class:`~google.cloud.spanner_v1.pool.AbstractSessionPool`.
:param pool: (Optional). Session pool to be used by database.
:type user_agent: :class:`str`
:param user_agent: (Optional) User agent to be used with this connection requests.
:rtype: :class:`google.cloud.spanner_dbapi.connection.Connection`
:returns: Connection object associated with the given Cloud Spanner resource.
:raises: :class:`ValueError` in case of given instance/database
doesn't exist.
"""
client = spanner_v1.Client(
project=project,
credentials=credentials,
client_info=google_client_info(user_agent),
)

instance = client.instance(instance_id)
if not instance.exists():
raise ValueError("instance '%s' does not exist." % instance_id)

database = instance.database(database_id, pool=pool)
if not database.exists():
raise ValueError("database '%s' does not exist." % database_id)

return Connection(instance, database)


__all__ = [
"Connection",
"connect",
"Cursor",
"DatabaseError",
"DataError",
"Error",
Expand All @@ -120,7 +66,6 @@ def connect(
"Warning",
"DEFAULT_USER_AGENT",
"apilevel",
"connect",
"paramstyle",
"threadsafety",
"get_param_types",
Expand Down
159 changes: 159 additions & 0 deletions google/cloud/spanner_dbapi/_helpers.py
@@ -0,0 +1,159 @@
# Copyright 2020 Google LLC
#
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd

from google.cloud.spanner_dbapi.parse_utils import get_param_types
from google.cloud.spanner_dbapi.parse_utils import parse_insert
from google.cloud.spanner_dbapi.parse_utils import sql_pyformat_args_to_spanner
from google.cloud.spanner_v1 import param_types


SQL_LIST_TABLES = """
SELECT
t.table_name
FROM
information_schema.tables AS t
WHERE
t.table_catalog = '' and t.table_schema = ''
"""

SQL_GET_TABLE_COLUMN_SCHEMA = """SELECT
COLUMN_NAME, IS_NULLABLE, SPANNER_TYPE
FROM
INFORMATION_SCHEMA.COLUMNS
WHERE
TABLE_SCHEMA = ''
AND
TABLE_NAME = @table_name
"""

# This table maps spanner_types to Spanner's data type sizes as per
# https://cloud.google.com/spanner/docs/data-types#allowable-types
# It is used to map `display_size` to a known type for Cursor.description
# after a row fetch.
# Since ResultMetadata
# https://cloud.google.com/spanner/docs/reference/rest/v1/ResultSetMetadata
# does not send back the actual size, we have to lookup the respective size.
# Some fields' sizes are dependent upon the dynamic data hence aren't sent back
# by Cloud Spanner.
code_to_display_size = {
param_types.BOOL.code: 1,
param_types.DATE.code: 4,
param_types.FLOAT64.code: 8,
param_types.INT64.code: 8,
param_types.TIMESTAMP.code: 12,
}


def _execute_insert_heterogenous(transaction, sql_params_list):
for sql, params in sql_params_list:
sql, params = sql_pyformat_args_to_spanner(sql, params)
param_types = get_param_types(params)
res = transaction.execute_sql(
sql, params=params, param_types=param_types
)
# TODO: File a bug with Cloud Spanner and the Python client maintainers
# about a lost commit when res isn't read from.
_ = list(res)


def _execute_insert_homogenous(transaction, parts):
# Perform an insert in one shot.
table = parts.get("table")
columns = parts.get("columns")
values = parts.get("values")
return transaction.insert(table, columns, values)


def handle_insert(connection, sql, params):
parts = parse_insert(sql, params)

# The split between the two styles exists because:
# in the common case of multiple values being passed
# with simple pyformat arguments,
# SQL: INSERT INTO T (f1, f2) VALUES (%s, %s, %s)
# Params: [(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,)]
# we can take advantage of a single RPC with:
# transaction.insert(table, columns, values)
# instead of invoking:
# with transaction:
# for sql, params in sql_params_list:
# transaction.execute_sql(sql, params, param_types)
# which invokes more RPCs and is more costly.

if parts.get("homogenous"):
# The common case of multiple values being passed in
# non-complex pyformat args and need to be uploaded in one RPC.
return connection.database.run_in_transaction(
_execute_insert_homogenous, parts
)
else:
# All the other cases that are esoteric and need
# transaction.execute_sql
sql_params_list = parts.get("sql_params_list")
return connection.database.run_in_transaction(
_execute_insert_heterogenous, sql_params_list
)


class ColumnInfo:
"""Row column description object."""

def __init__(
self,
name,
type_code,
display_size=None,
internal_size=None,
precision=None,
scale=None,
null_ok=False,
):
self.name = name
self.type_code = type_code
self.display_size = display_size
self.internal_size = internal_size
self.precision = precision
self.scale = scale
self.null_ok = null_ok

self.fields = (
self.name,
self.type_code,
self.display_size,
self.internal_size,
self.precision,
self.scale,
self.null_ok,
)

def __repr__(self):
return self.__str__()

def __getitem__(self, index):
return self.fields[index]

def __str__(self):
str_repr = ", ".join(
filter(
lambda part: part is not None,
[
"name='%s'" % self.name,
"type_code=%d" % self.type_code,
"display_size=%d" % self.display_size
if self.display_size
else None,
"internal_size=%d" % self.internal_size
if self.internal_size
else None,
"precision='%s'" % self.precision
if self.precision
else None,
"scale='%s'" % self.scale if self.scale else None,
"null_ok='%s'" % self.null_ok if self.null_ok else None,
],
)
)
return "ColumnInfo(%s)" % str_repr

0 comments on commit 927e178

Please sign in to comment.