/
single_partition_publisher.py
148 lines (127 loc) · 5.56 KB
/
single_partition_publisher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import asyncio
from typing import Optional, List, Iterable
from absl import logging
from google.cloud.pubsublite.internal.wire.publisher import Publisher
from google.cloud.pubsublite.internal.wire.retrying_connection import RetryingConnection, ConnectionFactory
from google.api_core.exceptions import FailedPrecondition, GoogleAPICallError
from google.cloud.pubsublite.internal.wire.connection_reinitializer import ConnectionReinitializer
from google.cloud.pubsublite.internal.wire.connection import Connection
from google.cloud.pubsublite.internal.wire.serial_batcher import SerialBatcher, BatchTester
from google.cloud.pubsublite.partition import Partition
from google.cloud.pubsublite.publish_metadata import PublishMetadata
from google.cloud.pubsublite_v1.types import PubSubMessage, Cursor, PublishRequest, PublishResponse, \
InitialPublishRequest
from google.cloud.pubsublite.internal.wire.work_item import WorkItem
# Maximum bytes per batch at 3.5 MiB to avoid GRPC limit of 4 MiB
_MAX_BYTES = int(3.5 * 1024 * 1024)
# Maximum messages per batch at 1000
_MAX_MESSAGES = 1000
class SinglePartitionPublisher(Publisher, ConnectionReinitializer[PublishRequest, PublishResponse], BatchTester[PubSubMessage]):
_initial: InitialPublishRequest
_flush_seconds: float
_connection: RetryingConnection[PublishRequest, PublishResponse]
_batcher: SerialBatcher[PubSubMessage, Cursor]
_outstanding_writes: List[List[WorkItem[PubSubMessage, Cursor]]]
_receiver: Optional[asyncio.Future]
_flusher: Optional[asyncio.Future]
def __init__(self, initial: InitialPublishRequest, flush_seconds: float,
factory: ConnectionFactory[PublishRequest, PublishResponse]):
self._initial = initial
self._flush_seconds = flush_seconds
self._connection = RetryingConnection(factory, self)
self._batcher = SerialBatcher(self)
self._outstanding_writes = []
self._receiver = None
self._flusher = None
@property
def _partition(self) -> Partition:
return Partition(self._initial.partition)
async def __aenter__(self):
await self._connection.__aenter__()
return self
def _start_loopers(self):
assert self._receiver is None
assert self._flusher is None
self._receiver = asyncio.ensure_future(self._receive_loop())
self._flusher = asyncio.ensure_future(self._flush_loop())
async def _stop_loopers(self):
if self._receiver:
self._receiver.cancel()
await self._receiver
self._receiver = None
if self._flusher:
self._flusher.cancel()
await self._flusher
self._flusher = None
def _handle_response(self, response: PublishResponse):
if "message_response" not in response:
self._connection.fail(FailedPrecondition("Received an invalid subsequent response on the publish stream."))
if not self._outstanding_writes:
self._connection.fail(
FailedPrecondition("Received an publish response on the stream with no outstanding publishes."))
next_offset: Cursor = response.message_response.start_cursor.offset
batch: List[WorkItem[PubSubMessage]] = self._outstanding_writes.pop(0)
for item in batch:
item.response_future.set_result(Cursor(offset=next_offset))
next_offset += 1
async def _receive_loop(self):
try:
while True:
response = await self._connection.read()
self._handle_response(response)
except (asyncio.CancelledError, GoogleAPICallError):
return
async def _flush_loop(self):
try:
while True:
await asyncio.sleep(self._flush_seconds)
await self._flush()
except asyncio.CancelledError:
return
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self._connection.error():
self._fail_if_retrying_failed()
else:
await self._flush()
await self._stop_loopers()
await self._connection.__aexit__(exc_type, exc_val, exc_tb)
def _fail_if_retrying_failed(self):
if self._connection.error():
for batch in self._outstanding_writes:
for item in batch:
item.response_future.set_exception(self._connection.error())
async def _flush(self):
batch = self._batcher.flush()
if not batch:
return
self._outstanding_writes.append(batch)
aggregate = PublishRequest()
aggregate.message_publish_request.messages = [item.request for item in batch]
try:
await self._connection.write(aggregate)
except GoogleAPICallError as e:
logging.debug(f"Failed publish on stream: {e}")
self._fail_if_retrying_failed()
async def publish(self, message: PubSubMessage) -> PublishMetadata:
cursor_future = self._batcher.add(message)
if self._batcher.should_flush():
await self._flush()
return PublishMetadata(self._partition, await cursor_future)
async def reinitialize(self, connection: Connection[PublishRequest, PublishResponse]):
await self._stop_loopers()
await connection.write(PublishRequest(initial_request=self._initial))
response = await connection.read()
if "initial_response" not in response:
self._connection.fail(FailedPrecondition("Received an invalid initial response on the publish stream."))
for batch in self._outstanding_writes:
aggregate = PublishRequest()
aggregate.message_publish_request.messages = [item.request for item in batch]
await connection.write(aggregate)
self._start_loopers()
def test(self, requests: Iterable[PubSubMessage]) -> bool:
request_count = 0
byte_count = 0
for req in requests:
request_count += 1
byte_count += PubSubMessage.pb(req).ByteSize()
return (request_count >= _MAX_MESSAGES) or (byte_count >= _MAX_BYTES)