MaxxleLLC · IlyaFaer · Sep 4, 2020 · Sep 4, 2020 · Sep 4, 2020 · Sep 7, 2020
diff --git a/google/cloud/spanner_v1/_helpers.py b/google/cloud/spanner_v1/_helpers.py
@@ -256,3 +256,24 @@ def _metadata_with_prefix(prefix, **kw):
         List[Tuple[str, str]]: RPC metadata with supplied prefix
     """
     return [("google-cloud-resource-prefix", prefix)]
+
+
+def _compare_checksums(original, retried):
+    """Compare the given checksums.
+
+    Raise an error if the given checksums have consumed
+    the same number of results, but are not equal.
+
+    :type original: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+    :param original: results checksum of the original transaction.
+
+    :type retried: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+    :param retried: results checksum of the retried transaction.
+
+    :raises: :exc:`RuntimeError` in case if checksums are not equal.
+    """
+    if original is not None:
+        if len(retried) == len(original) and retried != original:
+            raise RuntimeError(
+                "The underlying data being changed while retrying an aborted transaction."
+            )
diff --git a/google/cloud/spanner_v1/session.py b/google/cloud/spanner_v1/session.py
@@ -278,9 +278,13 @@ def batch(self):
 
         return Batch(self)
 
-    def transaction(self):
+    def transaction(self, original_results_checksum=None):
         """Create a transaction to perform a set of reads with shared staleness.
 
+        :type original_results_checksum: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+        :param original_results_checksum: original transaction results
+                                          checksum.
+
         :rtype: :class:`~google.cloud.spanner_v1.transaction.Transaction`
         :returns: a transaction bound to this session
         :raises ValueError: if the session has not yet been created.
@@ -292,12 +296,21 @@ def transaction(self):
             self._transaction.rolled_back = True
             del self._transaction
 
-        txn = self._transaction = Transaction(self)
-        return txn
+        self._transaction = Transaction(
+            self, original_results_checksum=original_results_checksum
+        )
+        return self._transaction
 
     def run_in_transaction(self, func, *args, **kw):
         """Perform a unit of work in a transaction, retrying on abort.
 
+        While executing the transaction operations a checksum
+        of their results is calculated. On aborted transaction
+        retry the checksum of the original transaction is compared
+        with the checksum of the retried transaction to ensure
+        the retried transaction has the same results that the
+        original one have had before abortion.
+
         :type func: callable
         :param func: takes a required positional argument, the transaction,
                      and additional positional / keyword arguments as supplied
@@ -317,13 +330,16 @@ def run_in_transaction(self, func, *args, **kw):
 
         :raises Exception:
             reraises any non-ABORT execptions raised by ``func``.
+        :raises: :exc:`RuntimeError` in case the data changed while
+                 retrying an aborted transaction.
         """
         deadline = time.time() + kw.pop("timeout_secs", DEFAULT_RETRY_TIMEOUT_SECS)
+        original_results_checksum = None
         attempts = 0
 
         while True:
             if self._transaction is None:
-                txn = self.transaction()
+                txn = self.transaction(original_results_checksum)
             else:
                 txn = self._transaction
             if txn._transaction_id is None:
@@ -333,6 +349,8 @@ def run_in_transaction(self, func, *args, **kw):
                 attempts += 1
                 return_value = func(txn, *args, **kw)
             except Aborted as exc:
+                if attempts == 1:
+                    original_results_checksum = self._transaction.results_checksum
                 del self._transaction
                 _delay_until_retry(exc, deadline, attempts)
                 continue
@@ -346,6 +364,8 @@ def run_in_transaction(self, func, *args, **kw):
             try:
                 txn.commit()
             except Aborted as exc:
+                if attempts == 1:
+                    original_results_checksum = self._transaction.results_checksum
                 del self._transaction
                 _delay_until_retry(exc, deadline, attempts)
             except GoogleAPICallError:

diff --git a/google/cloud/spanner_v1/snapshot.py b/google/cloud/spanner_v1/snapshot.py
@@ -171,9 +171,22 @@ def read(self, table, columns, keyset, index="", limit=0, partition=None):
         self._read_request_count += 1
 
         if self._multi_use:
-            return StreamedResultSet(iterator, source=self)
+            return StreamedResultSet(
+                iterator,
+                source=self,
+                results_checksum=getattr(self, "results_checksum", None),
+                original_results_checksum=getattr(
+                    self, "_original_results_checksum", None
+                ),
+            )
         else:
-            return StreamedResultSet(iterator)
+            return StreamedResultSet(
+                iterator,
+                results_checksum=getattr(self, "results_checksum", None),
+                original_results_checksum=getattr(
+                    self, "_original_results_checksum", None
+                ),
+            )
 
     def execute_sql(
         self,
@@ -278,9 +291,22 @@ def execute_sql(
         self._execute_sql_count += 1
 
         if self._multi_use:
-            return StreamedResultSet(iterator, source=self)
+            return StreamedResultSet(
+                iterator,
+                source=self,
+                results_checksum=getattr(self, "results_checksum", None),
+                original_results_checksum=getattr(
+                    self, "_original_results_checksum", None
+                ),
+            )
         else:
-            return StreamedResultSet(iterator)
+            return StreamedResultSet(
+                iterator,
+                results_checksum=getattr(self, "results_checksum", None),
+                original_results_checksum=getattr(
+                    self, "_original_results_checksum", None
+                ),
+            )
 
     def partition_read(
         self,

diff --git a/google/cloud/spanner_v1/streamed.py b/google/cloud/spanner_v1/streamed.py
@@ -18,6 +18,7 @@
 from google.protobuf.struct_pb2 import Value
 from google.cloud import exceptions
 from google.cloud.spanner_v1.proto import type_pb2
+from google.cloud.spanner_v1._helpers import _compare_checksums
 import six
 
 # pylint: disable=ungrouped-imports
@@ -37,16 +38,32 @@ class StreamedResultSet(object):
 
     :type source: :class:`~google.cloud.spanner_v1.snapshot.Snapshot`
     :param source: Snapshot from which the result set was fetched.
+
+    :type results_checksum: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+    :param results_checksum: A checksum to which streamed rows from this
+                             result set must be added.
+
+    :type original_results_checksum: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+    :param original_results_checksum: Results checksum of the original
+                                      transaction.
     """
 
-    def __init__(self, response_iterator, source=None):
+    def __init__(
+        self,
+        response_iterator,
+        source=None,
+        results_checksum=None,
+        original_results_checksum=None,
+    ):
         self._response_iterator = response_iterator
         self._rows = []  # Fully-processed rows
         self._metadata = None  # Until set from first PRS
         self._stats = None  # Until set from last PRS
         self._current_row = []  # Accumulated values for incomplete row
         self._pending_chunk = None  # Incomplete value
         self._source = source  # Source snapshot
+        self._results_checksum = results_checksum
+        self._original_results_checksum = original_results_checksum
 
     @property
     def fields(self):
@@ -143,7 +160,13 @@ def __iter__(self):
                     return
                 iter_rows, self._rows[:] = self._rows[:], ()
             while iter_rows:
-                yield iter_rows.pop(0)
+                row = iter_rows.pop(0)
+                if self._results_checksum is not None:
+                    self._results_checksum.consume_result(row)
+                    _compare_checksums(
+                        self._original_results_checksum, self._results_checksum
+                    )
+                yield row
 
     def one(self):
         """Return exactly one result, or raise an exception.

diff --git a/google/cloud/spanner_v1/transaction.py b/google/cloud/spanner_v1/transaction.py
@@ -14,10 +14,14 @@
 
 """Spanner read-write transaction support."""
 
+import hashlib
+import pickle
+
 from google.protobuf.struct_pb2 import Struct
 
 from google.cloud._helpers import _pb_timestamp_to_datetime
 from google.cloud.spanner_v1._helpers import (
+    _compare_checksums,
     _make_value_pb,
     _merge_query_options,
     _metadata_with_prefix,
@@ -35,6 +39,10 @@ class Transaction(_SnapshotBase, _BatchBase):
     :type session: :class:`~google.cloud.spanner_v1.session.Session`
     :param session: the session used to perform the commit
 
+    :type original_results_checksum: :class:`~google.cloud.spanner_v1.transaction.ResultsChecksum`
+    :param original_results_checksum: results checksum of the
+                                      original transaction.
+
     :raises ValueError: if session has an existing transaction
     """
 
@@ -44,11 +52,21 @@ class Transaction(_SnapshotBase, _BatchBase):
     _multi_use = True
     _execute_sql_count = 0
 
-    def __init__(self, session):
+    def __init__(self, session, original_results_checksum=None):
         if session._transaction is not None:
             raise ValueError("Session has existing transaction.")
 
         super(Transaction, self).__init__(session)
+        self._results_checksum = ResultsChecksum()  # this transaction results checksum
+        self._original_results_checksum = original_results_checksum
+
+    @property
+    def results_checksum(self):
+        """
+        Cumulative checksum of all the results returned
+        by all the operations runned within this transaction.
+        """
+        return self._results_checksum
 
     def _check_state(self):
         """Helper for :meth:`commit` et al.
@@ -232,6 +250,9 @@ def execute_update(
                 seqno=seqno,
                 metadata=metadata,
             )
+        self._results_checksum.consume_result(response.stats.row_count_exact)
+
+        _compare_checksums(self._original_results_checksum, self._results_checksum)
         return response.stats.row_count_exact
 
     def batch_update(self, statements):
@@ -292,6 +313,9 @@ def batch_update(self, statements):
         row_counts = [
             result_set.stats.row_count_exact for result_set in response.result_sets
         ]
+        self._results_checksum.consume_result(row_counts)
+
+        _compare_checksums(self._original_results_checksum, self._results_checksum)
         return response.status, row_counts
 
     def __enter__(self):
@@ -305,3 +329,47 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             self.commit()
         else:
             self.rollback()
+
+
+class ResultsChecksum:
+    """Cumulative checksum.
+
+    Used to calculate a total checksum of all the results
+    returned by operations executed within transaction.
+    Includes methods for checksums comparison.
+
+    These checksums are used while retrying an aborted
+    transaction to check if the results of a retried transaction
+    are equal to the results of the original transaction.
+    """
+
+    def __init__(self):
+        self.checksum = hashlib.sha256()
+        self.count = 0  # counter of consumed results
+
+    def __len__(self):
+        """Return the number of consumed results.
+
+        Returns:
+            int: The number of results.
+        """
+        return self.count
+
+    def __eq__(self, other):
+        """Check if checksums are equal.
+
+        Args:
+            other (ResultsChecksum):
+                Another checksum to compare with this one.
+        """
+        return self.checksum.digest() == other.checksum.digest()
+
+    def consume_result(self, result):
+        """Add the given result into the checksum.
+
+        Args:
+            result (Union[int, list]):
+                Streamed row or row count from an UPDATE operation.
+        """
+        self.checksum.update(pickle.dumps(result))
+        self.count += 1
diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py
@@ -526,3 +526,48 @@ def test(self):
         prefix = "prefix"
         metadata = self._call_fut(prefix)
         self.assertEqual(metadata, [("google-cloud-resource-prefix", prefix)])
+
+
+class Test_compare_checksums(unittest.TestCase):
+    def _cal_fut(self, *args, **kw):
+        from google.cloud.spanner_v1._helpers import _compare_checksums
+
+        return _compare_checksums(*args, **kw)
+
+    def test_no_original_checksum(self):
+        from google.cloud.spanner_v1.transaction import ResultsChecksum
+
+        self.assertIsNone(self._cal_fut(None, ResultsChecksum()))
+
+    def test_equal(self):
+        from google.cloud.spanner_v1.transaction import ResultsChecksum
+
+        original = ResultsChecksum()
+        original.consume_result(5)
+
+        retried = ResultsChecksum()
+        retried.consume_result(5)
+
+        self.assertIsNone(self._cal_fut(original, retried))
+
+    def test_less_results(self):
+        from google.cloud.spanner_v1.transaction import ResultsChecksum
+
+        original = ResultsChecksum()
+        original.consume_result(5)
+
+        retried = ResultsChecksum()
+
+        self.assertIsNone(self._cal_fut(original, retried))
+
+    def test_mismatch(self):
+        from google.cloud.spanner_v1.transaction import ResultsChecksum
+
+        original = ResultsChecksum()
+        original.consume_result(5)
+
+        retried = ResultsChecksum()
+        retried.consume_result(2)
+
+        with self.assertRaises(RuntimeError):
+            self._cal_fut(original, retried)