Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: further avoid using proto-plus wrapper when unmarshalling entities #190

Merged
merged 7 commits into from Jul 9, 2021
Merged
93 changes: 31 additions & 62 deletions google/cloud/datastore/helpers.py
Expand Up @@ -22,6 +22,7 @@

from google.protobuf import struct_pb2
from google.type import latlng_pb2
from proto.datetime_helpers import DatetimeWithNanoseconds

from google.cloud._helpers import _datetime_to_pb_timestamp
from google.cloud.datastore_v1.types import datastore as datastore_pb2
Expand All @@ -33,8 +34,8 @@
def _get_meaning(value_pb, is_list=False):
"""Get the meaning from a protobuf value.

:type value_pb: :class:`.entity_pb2.Value`
:param value_pb: The protobuf value to be checked for an
:type value_pb: :class:`.entity_pb2.Value._pb`
:param value_pb: The *raw* protobuf value to be checked for an
associated meaning.

:type is_list: bool
Expand All @@ -47,14 +48,9 @@ def _get_meaning(value_pb, is_list=False):
means it just returns a list of meanings. If all the
list meanings agree, it just condenses them.
"""
meaning = None
if is_list:

values = (
value_pb._pb.array_value.values
if hasattr(value_pb, "_pb")
else value_pb.array_value.values
)
values = value_pb.array_value.values

# An empty list will have no values, hence no shared meaning
# set among them.
Expand All @@ -65,16 +61,18 @@ def _get_meaning(value_pb, is_list=False):
# the rest which may be enum/int values.
all_meanings = [_get_meaning(sub_value_pb) for sub_value_pb in values]
unique_meanings = set(all_meanings)

if len(unique_meanings) == 1:
# If there is a unique meaning, we preserve it.
meaning = unique_meanings.pop()
return unique_meanings.pop()
else: # We know len(value_pb.array_value.values) > 0.
# If the meaning is not unique, just return all of them.
meaning = all_meanings
return all_meanings

elif value_pb.meaning: # Simple field (int32).
meaning = value_pb.meaning
return value_pb.meaning

return meaning
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure I'm following, do the edits to _get_meaning() impact performance, or is this just for readability?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big hit: the three-item getattr can be much faster than a try: ... except AttributeError:, but likely not the conditional assignment. OTOH, since in later commits we no longer risk handing the proto-plus wrapper into _get_meaning, we could likely pull that getattr out. The other changes to _get_meaning are micro-optimizaitons, which neither helped nor hurt on my comparison test.



def _new_value_pb(entity_pb, name):
Expand All @@ -89,29 +87,12 @@ def _new_value_pb(entity_pb, name):
:rtype: :class:`.entity_pb2.Value`
:returns: The new ``Value`` protobuf that was added to the entity.
"""
properties = entity_pb.properties
try:
properties = properties._pb
except AttributeError:
# TODO(microgenerator): shouldn't need this. the issue is that
# we have wrapped and non-wrapped protos coming here.
pass
# TODO(microgenerator): shouldn't need this. the issue is that
# we have wrapped and non-wrapped protos coming here.
properties = getattr(entity_pb.properties, "_pb", entity_pb.properties)
return properties.get_or_create(name)


def _property_tuples(entity_pb):
"""Iterator of name, ``Value`` tuples from entity properties.

:type entity_pb: :class:`.entity_pb2.Entity`
:param entity_pb: An entity protobuf to add a new property to.

:rtype: :class:`generator`
:returns: An iterator that yields tuples of a name and ``Value``
corresponding to properties on the entity.
"""
return iter(entity_pb.properties.items())


def entity_from_protobuf(pb):
"""Factory method for creating an entity based on a protobuf.

Expand All @@ -124,21 +105,18 @@ def entity_from_protobuf(pb):
:rtype: :class:`google.cloud.datastore.entity.Entity`
:returns: The entity derived from the protobuf.
"""
if not isinstance(pb, entity_pb2.Entity):
proto_pb = entity_pb2.Entity.wrap(pb)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the removal of this line and focusing entirely on raw protobuf objects what delivers the final performance gains?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, indeed. The proto-plus wrappers are an enormous perf-suck. Most of the win in this PR is avoiding touching them anywhere except at the outermost entry point, and then unwrapping there ASAP.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had not pieced together that we could avoid .wrap() altogether. The last time I was in here, I'd mistakenly assumed calling that (which is at least the least-slow proto-plus entrypoint) was still required.

Very nice!

else:
proto_pb = pb
if isinstance(pb, entity_pb2.Entity):
pb = pb._pb

key = None
if "key" in proto_pb: # Message field (Key)
key = key_from_protobuf(proto_pb.key)
if pb.HasField("key"): # Message field (Key)
key = key_from_protobuf(pb.key)

entity_props = {}
entity_meanings = {}
exclude_from_indexes = []

for prop_name, value_pb in _property_tuples(proto_pb._pb):
for prop_name, value_pb in pb.properties.items():
value = _get_value_from_value_pb(value_pb)
entity_props[prop_name] = value

Expand Down Expand Up @@ -384,7 +362,7 @@ def _pb_attr_value(val):
return name + "_value", value


def _get_value_from_value_pb(value):
def _get_value_from_value_pb(pb):
"""Given a protobuf for a Value, get the correct value.

The Cloud Datastore Protobuf API returns a Property Protobuf which
Expand All @@ -394,56 +372,47 @@ def _get_value_from_value_pb(value):
Some work is done to coerce the return value into a more useful type
(particularly in the case of a timestamp value, or a key value).

:type value_pb: :class:`.entity_pb2.Value`
:param value_pb: The Value Protobuf.
:type pb: :class:`.entity_pb2.Value._pb`
:param pb: The *raw* Value Protobuf.

:rtype: object
:returns: The value provided by the Protobuf.
:raises: :class:`ValueError <exceptions.ValueError>` if no value type
has been set.
"""
if not getattr(value, "_pb", False):
# Coerce raw pb type into proto-plus pythonic type.
value = entity_pb2.Value.wrap(value)

value_type = value._pb.WhichOneof("value_type")
value_type = pb.WhichOneof("value_type")

if value_type == "timestamp_value":
# Do not access `._pb` here, as that returns a Timestamp proto,
# but this should return a Pythonic `DatetimeWithNanoseconds` value,
# which is found at `value.timestamp_value`
result = value.timestamp_value
result = DatetimeWithNanoseconds.from_timestamp_pb(pb.timestamp_value)

elif value_type == "key_value":
result = key_from_protobuf(value._pb.key_value)
result = key_from_protobuf(pb.key_value)

elif value_type == "boolean_value":
result = value._pb.boolean_value
result = pb.boolean_value

elif value_type == "double_value":
result = value._pb.double_value
result = pb.double_value

elif value_type == "integer_value":
result = value._pb.integer_value
result = pb.integer_value

elif value_type == "string_value":
result = value._pb.string_value
result = pb.string_value

elif value_type == "blob_value":
result = value._pb.blob_value
result = pb.blob_value

elif value_type == "entity_value":
result = entity_from_protobuf(value._pb.entity_value)
result = entity_from_protobuf(pb.entity_value)

elif value_type == "array_value":
result = [
_get_value_from_value_pb(value) for value in value._pb.array_value.values
_get_value_from_value_pb(item_value) for item_value in pb.array_value.values
]

elif value_type == "geo_point_value":
result = GeoPoint(
value._pb.geo_point_value.latitude, value._pb.geo_point_value.longitude,
)
result = GeoPoint(pb.geo_point_value.latitude, pb.geo_point_value.longitude,)

elif value_type == "null_value":
result = None
Expand Down
4 changes: 1 addition & 3 deletions tests/unit/test_batch.py
Expand Up @@ -118,8 +118,6 @@ def test_put_entity_w_partial_key(self):
self.assertEqual(batch._partial_key_entities, [entity])

def test_put_entity_w_completed_key(self):
from google.cloud.datastore.helpers import _property_tuples

project = "PROJECT"
properties = {"foo": "bar", "baz": "qux", "spam": [1, 2, 3], "frotz": []}
client = _Client(project)
Expand All @@ -134,7 +132,7 @@ def test_put_entity_w_completed_key(self):
mutated_entity = _mutated_pb(self, batch.mutations, "upsert")
self.assertEqual(mutated_entity.key, key._key)

prop_dict = dict(_property_tuples(mutated_entity))
prop_dict = dict(mutated_entity.properties.items())
self.assertEqual(len(prop_dict), 4)
self.assertFalse(prop_dict["foo"].exclude_from_indexes)
self.assertTrue(prop_dict["baz"].exclude_from_indexes)
Expand Down
7 changes: 2 additions & 5 deletions tests/unit/test_client.py
Expand Up @@ -802,7 +802,6 @@ def test_put_multi_w_single_empty_entity(self):

def test_put_multi_no_batch_w_partial_key_w_retry_w_timeout(self):
from google.cloud.datastore_v1.types import datastore as datastore_pb2
from google.cloud.datastore.helpers import _property_tuples

entity = _Entity(foo=u"bar")
key = entity.key = _Key(_Key.kind, None)
Expand Down Expand Up @@ -838,15 +837,13 @@ def test_put_multi_no_batch_w_partial_key_w_retry_w_timeout(self):
mutated_entity = _mutated_pb(self, mutations, "insert")
self.assertEqual(mutated_entity.key, key.to_protobuf())

prop_list = list(_property_tuples(mutated_entity))
prop_list = list(mutated_entity.properties.items())
self.assertTrue(len(prop_list), 1)
name, value_pb = prop_list[0]
self.assertEqual(name, "foo")
self.assertEqual(value_pb.string_value, u"bar")

def test_put_multi_existing_batch_w_completed_key(self):
from google.cloud.datastore.helpers import _property_tuples

creds = _make_credentials()
client = self._make_one(credentials=creds)
entity = _Entity(foo=u"bar")
Expand All @@ -859,7 +856,7 @@ def test_put_multi_existing_batch_w_completed_key(self):
mutated_entity = _mutated_pb(self, CURR_BATCH.mutations, "upsert")
self.assertEqual(mutated_entity.key, key.to_protobuf())

prop_list = list(_property_tuples(mutated_entity))
prop_list = list(mutated_entity.properties.items())
self.assertTrue(len(prop_list), 1)
name, value_pb = prop_list[0]
self.assertEqual(name, "foo")
Expand Down