Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: STRUCT and ARRAY support #318

Merged
merged 37 commits into from Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
52cee8c
feat: STRUCT and ARRAY support
jimfulton Aug 30, 2021
a0b02f7
Merge branch 'main' into struct
jimfulton Aug 30, 2021
6bacc0d
Fixed test that expected JSON rather than STRUCT
jimfulton Aug 31, 2021
1ec0f88
Merge branch 'struct' of github.com:jimfulton/python-bigquery-sqlalch…
jimfulton Aug 31, 2021
74aab64
Added system test I neglected to check in before :(
jimfulton Aug 31, 2021
c5653e2
blacken
jimfulton Aug 31, 2021
a7f0b41
Merge branch 'main' into struct
jimfulton Aug 31, 2021
9df1804
Don't strip <ARRAY > from parameter types
jimfulton Aug 31, 2021
0df1701
Added system tests to verift PR 67 and issue 233
jimfulton Aug 31, 2021
7aad07f
Merge branch 'struct' of github.com:jimfulton/python-bigquery-sqlalch…
jimfulton Aug 31, 2021
f10a571
blacken
jimfulton Aug 31, 2021
ec31040
Renamed test file to conform to samples test-file naming conventions
jimfulton Sep 1, 2021
accf762
Require google-cloud-bigquery 2.25.2 to get struct field-name undersc…
jimfulton Sep 1, 2021
ef5f891
Added STRUCT documentation
jimfulton Sep 1, 2021
cce9dbb
fix bigquery version
jimfulton Sep 1, 2021
290d955
Merge branch 'main' into struct
jimfulton Sep 1, 2021
b697df6
get blacken to leave sample code alone.
jimfulton Sep 1, 2021
6a278b9
Check in missing file :(
jimfulton Sep 1, 2021
bc62a56
Merge branch 'struct' of github.com:jimfulton/python-bigquery-sqlalch…
jimfulton Sep 1, 2021
84426bd
need sqla 1.4 for unnest
jimfulton Sep 1, 2021
587a0f7
fixed typo
jimfulton Sep 1, 2021
e6f4adf
Merge branch 'main' into struct
jimfulton Sep 2, 2021
ffb5aa9
Merge branch 'main' into struct
jimfulton Sep 2, 2021
47fa14f
Merge branch 'main' into struct
jimfulton Sep 3, 2021
402bbbe
Merge branch 'main' into struct
jimfulton Sep 7, 2021
5bf07b4
Update sqlalchemy_bigquery/_struct.py
jimfulton Sep 7, 2021
e937167
added STRUCT docstring
jimfulton Sep 7, 2021
8661f5b
Add doc link
jimfulton Sep 7, 2021
b550aa1
Merge branch 'struct' of github.com:jimfulton/python-bigquery-sqlalch…
jimfulton Sep 7, 2021
af68a54
Added some comments
jimfulton Sep 7, 2021
da43fd2
Localize logic for getting subtye column specifications
jimfulton Sep 8, 2021
f04cac2
explain semi-private name mangling
jimfulton Sep 8, 2021
5af05bb
Make name magling more explicit
jimfulton Sep 8, 2021
09866c6
explain why we have different implementations of _field_index for SQL…
jimfulton Sep 8, 2021
054c227
get rid of cur_fields, we're not using it anymore.
jimfulton Sep 8, 2021
1a79305
Add a todo to find out why Sqlalchemy doesn't generate an alias when …
jimfulton Sep 8, 2021
5e2ae32
user `repr` rather than `str` to shpow an object in an error message
jimfulton Sep 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 25 additions & 17 deletions sqlalchemy_bigquery/_struct.py
Expand Up @@ -34,8 +34,13 @@
import sqlalchemy.sql.coercions
import sqlalchemy.sql.roles

# We have to delay getting the type compiler, because of circular imports. :(
type_compiler = None

def _get_subtype_col_spec(type_):
global _get_subtype_col_spec

type_compiler = base.dialect.type_compiler(base.dialect())
_get_subtype_col_spec = type_compiler.process
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fancy! I didn't realize a function could replace itself. I like it.

return _get_subtype_col_spec(type_)


class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType):
tswast marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -55,30 +60,29 @@ def __init__(
# Note that because:
# https://docs.python.org/3/whatsnew/3.6.html#pep-468-preserving-keyword-argument-order
# We know that `kwfields` preserves order.
self.__fields = tuple(
self._STRUCT_fields = tuple(
(
name,
type_ if isinstance(type_, sqlalchemy.types.TypeEngine) else type_(),
)
for (name, type_) in (fields + tuple(kwfields.items()))
)

self.__byname = {name.lower(): type_ for (name, type_) in self.__fields}
self._STRUCT_byname = {
name.lower(): type_ for (name, type_) in self._STRUCT_fields
}

def __repr__(self):
fields = ", ".join(f"{name}={repr(type_)}" for name, type_ in self.__fields)
fields = ", ".join(
f"{name}={repr(type_)}" for name, type_ in self._STRUCT_fields
)
return f"STRUCT({fields})"

def get_col_spec(self, **kw):
global type_compiler

try:
process = type_compiler.process
except AttributeError:
type_compiler = base.dialect.type_compiler(base.dialect())
process = type_compiler.process

fields = ", ".join(f"{name} {process(type_)}" for name, type_ in self.__fields)
fields = ", ".join(
f"{name} {_get_subtype_col_spec(type_)}"
for name, type_ in self._STRUCT_fields
)
return f"STRUCT<{fields}>"

def bind_processor(self, dialect):
Expand All @@ -89,22 +93,26 @@ def _setup_getitem(self, name):
if not isinstance(name, str):
raise TypeError(
f"STRUCT fields can only be accessed with strings field names,"
f" not {name}."
f" not {repr(name)}."
)
subtype = self.expr.type._STRUCT__byname.get(name.lower())
subtype = self.expr.type._STRUCT_byname.get(name.lower())
if subtype is None:
raise KeyError(name)
operator = struct_getitem_op
index = _field_index(self, name, operator)
return operator, index, subtype

def __getattr__(self, name):
if name.lower() in self.expr.type._STRUCT__byname:
if name.lower() in self.expr.type._STRUCT_byname:
return self[name]

comparator_factory = Comparator


# In the implementations of _field_index below, we're stealing from
# the JSON type implementation, but the code to steal changed in
# 1.4. :/

if sqlalchemy_1_4_or_more:

tswast marked this conversation as resolved.
Show resolved Hide resolved
def _field_index(self, name, operator):
Expand Down
12 changes: 6 additions & 6 deletions sqlalchemy_bigquery/_types.py
Expand Up @@ -72,25 +72,25 @@
except NameError:
pass

STRUCT_FIELD_TYPES = "RECORD", "STRUCT"

def _get_transitive_schema_fields(fields, cur_fields):

def _get_transitive_schema_fields(fields):
"""
Recurse into record type and return all the nested field names.
As contributed by @sumedhsakdeo on issue #17
"""
results = []
for field in fields:
results += [field]
if field.field_type == "RECORD":
cur_fields.append(field)
if field.field_type in STRUCT_FIELD_TYPES:
sub_fields = [
SchemaField.from_api_repr(
dict(f.to_api_repr(), name=f"{field.name}.{f.name}")
)
for f in field.fields
]
results += _get_transitive_schema_fields(sub_fields, cur_fields)
cur_fields.pop()
results += _get_transitive_schema_fields(sub_fields)
return results


Expand Down Expand Up @@ -125,7 +125,7 @@ def _get_sqla_column_type(field):


def get_columns(bq_schema):
fields = _get_transitive_schema_fields(bq_schema, [])
fields = _get_transitive_schema_fields(bq_schema)
return [
{
"name": field.name,
Expand Down
6 changes: 4 additions & 2 deletions tests/unit/test__struct.py
Expand Up @@ -64,8 +64,10 @@ def _col():
(_col().NAME, "`t`.`person`.NAME"),
(_col().children, "`t`.`person`.children"),
(
_col().children[0].label("anon_1"), # SQLAlchemy doesn't add the label
# in this case for some reason
# SQLAlchemy doesn't add the label in this case for some reason.
# TODO: why?
# https://github.com/googleapis/python-bigquery-sqlalchemy/issues/336
_col().children[0].label("anon_1"),
"(`t`.`person`.children)[OFFSET(%(param_1:INT64)s)]",
),
(
Expand Down