New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
perf: further avoid using proto-plus wrapper when unmarshalling entities #190
Changes from all commits
a755fff
018acd9
3f928e1
151aa17
544d0c1
9910960
5cbf27a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
|
||
from google.protobuf import struct_pb2 | ||
from google.type import latlng_pb2 | ||
from proto.datetime_helpers import DatetimeWithNanoseconds | ||
|
||
from google.cloud._helpers import _datetime_to_pb_timestamp | ||
from google.cloud.datastore_v1.types import datastore as datastore_pb2 | ||
|
@@ -33,8 +34,8 @@ | |
def _get_meaning(value_pb, is_list=False): | ||
"""Get the meaning from a protobuf value. | ||
|
||
:type value_pb: :class:`.entity_pb2.Value` | ||
:param value_pb: The protobuf value to be checked for an | ||
:type value_pb: :class:`.entity_pb2.Value._pb` | ||
:param value_pb: The *raw* protobuf value to be checked for an | ||
associated meaning. | ||
|
||
:type is_list: bool | ||
|
@@ -47,14 +48,9 @@ def _get_meaning(value_pb, is_list=False): | |
means it just returns a list of meanings. If all the | ||
list meanings agree, it just condenses them. | ||
""" | ||
meaning = None | ||
if is_list: | ||
|
||
values = ( | ||
value_pb._pb.array_value.values | ||
if hasattr(value_pb, "_pb") | ||
else value_pb.array_value.values | ||
) | ||
values = value_pb.array_value.values | ||
|
||
# An empty list will have no values, hence no shared meaning | ||
# set among them. | ||
|
@@ -65,16 +61,18 @@ def _get_meaning(value_pb, is_list=False): | |
# the rest which may be enum/int values. | ||
all_meanings = [_get_meaning(sub_value_pb) for sub_value_pb in values] | ||
unique_meanings = set(all_meanings) | ||
|
||
if len(unique_meanings) == 1: | ||
# If there is a unique meaning, we preserve it. | ||
meaning = unique_meanings.pop() | ||
return unique_meanings.pop() | ||
else: # We know len(value_pb.array_value.values) > 0. | ||
# If the meaning is not unique, just return all of them. | ||
meaning = all_meanings | ||
return all_meanings | ||
|
||
elif value_pb.meaning: # Simple field (int32). | ||
meaning = value_pb.meaning | ||
return value_pb.meaning | ||
|
||
return meaning | ||
return None | ||
|
||
|
||
def _new_value_pb(entity_pb, name): | ||
|
@@ -89,29 +87,12 @@ def _new_value_pb(entity_pb, name): | |
:rtype: :class:`.entity_pb2.Value` | ||
:returns: The new ``Value`` protobuf that was added to the entity. | ||
""" | ||
properties = entity_pb.properties | ||
try: | ||
properties = properties._pb | ||
except AttributeError: | ||
# TODO(microgenerator): shouldn't need this. the issue is that | ||
# we have wrapped and non-wrapped protos coming here. | ||
pass | ||
# TODO(microgenerator): shouldn't need this. the issue is that | ||
# we have wrapped and non-wrapped protos coming here. | ||
properties = getattr(entity_pb.properties, "_pb", entity_pb.properties) | ||
return properties.get_or_create(name) | ||
|
||
|
||
def _property_tuples(entity_pb): | ||
"""Iterator of name, ``Value`` tuples from entity properties. | ||
|
||
:type entity_pb: :class:`.entity_pb2.Entity` | ||
:param entity_pb: An entity protobuf to add a new property to. | ||
|
||
:rtype: :class:`generator` | ||
:returns: An iterator that yields tuples of a name and ``Value`` | ||
corresponding to properties on the entity. | ||
""" | ||
return iter(entity_pb.properties.items()) | ||
|
||
|
||
def entity_from_protobuf(pb): | ||
"""Factory method for creating an entity based on a protobuf. | ||
|
||
|
@@ -124,21 +105,18 @@ def entity_from_protobuf(pb): | |
:rtype: :class:`google.cloud.datastore.entity.Entity` | ||
:returns: The entity derived from the protobuf. | ||
""" | ||
if not isinstance(pb, entity_pb2.Entity): | ||
proto_pb = entity_pb2.Entity.wrap(pb) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the removal of this line and focusing entirely on raw protobuf objects what delivers the final performance gains? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, indeed. The proto-plus wrappers are an enormous perf-suck. Most of the win in this PR is avoiding touching them anywhere except at the outermost entry point, and then unwrapping there ASAP. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had not pieced together that we could avoid Very nice! |
||
else: | ||
proto_pb = pb | ||
if isinstance(pb, entity_pb2.Entity): | ||
pb = pb._pb | ||
|
||
key = None | ||
if "key" in proto_pb: # Message field (Key) | ||
key = key_from_protobuf(proto_pb.key) | ||
if pb.HasField("key"): # Message field (Key) | ||
key = key_from_protobuf(pb.key) | ||
|
||
entity_props = {} | ||
entity_meanings = {} | ||
exclude_from_indexes = [] | ||
|
||
for prop_name, value_pb in _property_tuples(proto_pb._pb): | ||
for prop_name, value_pb in pb.properties.items(): | ||
value = _get_value_from_value_pb(value_pb) | ||
entity_props[prop_name] = value | ||
|
||
|
@@ -384,7 +362,7 @@ def _pb_attr_value(val): | |
return name + "_value", value | ||
|
||
|
||
def _get_value_from_value_pb(value): | ||
def _get_value_from_value_pb(pb): | ||
"""Given a protobuf for a Value, get the correct value. | ||
|
||
The Cloud Datastore Protobuf API returns a Property Protobuf which | ||
|
@@ -394,56 +372,47 @@ def _get_value_from_value_pb(value): | |
Some work is done to coerce the return value into a more useful type | ||
(particularly in the case of a timestamp value, or a key value). | ||
|
||
:type value_pb: :class:`.entity_pb2.Value` | ||
:param value_pb: The Value Protobuf. | ||
:type pb: :class:`.entity_pb2.Value._pb` | ||
:param pb: The *raw* Value Protobuf. | ||
|
||
:rtype: object | ||
:returns: The value provided by the Protobuf. | ||
:raises: :class:`ValueError <exceptions.ValueError>` if no value type | ||
has been set. | ||
""" | ||
if not getattr(value, "_pb", False): | ||
# Coerce raw pb type into proto-plus pythonic type. | ||
value = entity_pb2.Value.wrap(value) | ||
|
||
value_type = value._pb.WhichOneof("value_type") | ||
value_type = pb.WhichOneof("value_type") | ||
|
||
if value_type == "timestamp_value": | ||
# Do not access `._pb` here, as that returns a Timestamp proto, | ||
# but this should return a Pythonic `DatetimeWithNanoseconds` value, | ||
# which is found at `value.timestamp_value` | ||
result = value.timestamp_value | ||
result = DatetimeWithNanoseconds.from_timestamp_pb(pb.timestamp_value) | ||
|
||
elif value_type == "key_value": | ||
result = key_from_protobuf(value._pb.key_value) | ||
result = key_from_protobuf(pb.key_value) | ||
|
||
elif value_type == "boolean_value": | ||
result = value._pb.boolean_value | ||
result = pb.boolean_value | ||
|
||
elif value_type == "double_value": | ||
result = value._pb.double_value | ||
result = pb.double_value | ||
|
||
elif value_type == "integer_value": | ||
result = value._pb.integer_value | ||
result = pb.integer_value | ||
|
||
elif value_type == "string_value": | ||
result = value._pb.string_value | ||
result = pb.string_value | ||
|
||
elif value_type == "blob_value": | ||
result = value._pb.blob_value | ||
result = pb.blob_value | ||
|
||
elif value_type == "entity_value": | ||
result = entity_from_protobuf(value._pb.entity_value) | ||
result = entity_from_protobuf(pb.entity_value) | ||
|
||
elif value_type == "array_value": | ||
result = [ | ||
_get_value_from_value_pb(value) for value in value._pb.array_value.values | ||
_get_value_from_value_pb(item_value) for item_value in pb.array_value.values | ||
] | ||
|
||
elif value_type == "geo_point_value": | ||
result = GeoPoint( | ||
value._pb.geo_point_value.latitude, value._pb.geo_point_value.longitude, | ||
) | ||
result = GeoPoint(pb.geo_point_value.latitude, pb.geo_point_value.longitude,) | ||
|
||
elif value_type == "null_value": | ||
result = None | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To make sure I'm following, do the edits to
_get_meaning()
impact performance, or is this just for readability?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a big hit: the three-item
getattr
can be much faster than atry: ... except AttributeError:
, but likely not the conditional assignment. OTOH, since in later commits we no longer risk handing the proto-plus wrapper into_get_meaning
, we could likely pull thatgetattr
out. The other changes to_get_meaning
are micro-optimizaitons, which neither helped nor hurt on my comparison test.