Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Implement a single partition publisher (#8)
* feat: Implement SerialBatcher which helps with transforming single writes into batch writes. * feat: Implement SinglePartitionPublisher which publishes to a single partition and handles retries.
- Loading branch information
1 parent
a6dc15f
commit fd1d76f
Showing
9 changed files
with
489 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from abc import ABC, abstractmethod | ||
from google.cloud.pubsublite_v1.types import PubSubMessage | ||
from google.cloud.pubsublite.publish_metadata import PublishMetadata | ||
|
||
|
||
class Publisher(ABC): | ||
@abstractmethod | ||
async def __aenter__(self): | ||
raise NotImplementedError() | ||
|
||
@abstractmethod | ||
async def __aexit__(self, exc_type, exc_val, exc_tb): | ||
raise NotImplementedError() | ||
|
||
@abstractmethod | ||
async def publish(self, message: PubSubMessage) -> PublishMetadata: | ||
""" | ||
Publish the provided message. | ||
Args: | ||
message: The message to be published. | ||
Returns: | ||
Metadata about the published message. | ||
Raises: | ||
GoogleAPICallError: On a permanent error. | ||
""" | ||
raise NotImplementedError() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
146 changes: 146 additions & 0 deletions
146
google/cloud/pubsublite/internal/wire/single_partition_publisher.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import asyncio | ||
from typing import Optional, List, Iterable | ||
|
||
from absl import logging | ||
from google.cloud.pubsublite.internal.wire.publisher import Publisher | ||
from google.cloud.pubsublite.internal.wire.retrying_connection import RetryingConnection, ConnectionFactory | ||
from google.api_core.exceptions import FailedPrecondition, GoogleAPICallError | ||
from google.cloud.pubsublite.internal.wire.connection_reinitializer import ConnectionReinitializer | ||
from google.cloud.pubsublite.internal.wire.connection import Connection | ||
from google.cloud.pubsublite.internal.wire.serial_batcher import SerialBatcher, BatchTester | ||
from google.cloud.pubsublite.partition import Partition | ||
from google.cloud.pubsublite.publish_metadata import PublishMetadata | ||
from google.cloud.pubsublite_v1.types import PubSubMessage, Cursor, PublishRequest, PublishResponse, \ | ||
InitialPublishRequest | ||
from google.cloud.pubsublite.internal.wire.work_item import WorkItem | ||
|
||
# Maximum bytes per batch at 3.5 MiB to avoid GRPC limit of 4 MiB | ||
_MAX_BYTES = int(3.5 * 1024 * 1024) | ||
|
||
# Maximum messages per batch at 1000 | ||
_MAX_MESSAGES = 1000 | ||
|
||
|
||
class SinglePartitionPublisher(Publisher, ConnectionReinitializer[PublishRequest, PublishResponse], BatchTester[PubSubMessage]): | ||
_initial: InitialPublishRequest | ||
_flush_seconds: float | ||
_connection: RetryingConnection[PublishRequest, PublishResponse] | ||
|
||
_batcher: SerialBatcher[PubSubMessage, Cursor] | ||
_outstanding_writes: List[List[WorkItem[PubSubMessage, Cursor]]] | ||
|
||
_receiver: Optional[asyncio.Future] | ||
_flusher: Optional[asyncio.Future] | ||
|
||
def __init__(self, initial: InitialPublishRequest, flush_seconds: float, | ||
factory: ConnectionFactory[PublishRequest, PublishResponse]): | ||
self._initial = initial | ||
self._flush_seconds = flush_seconds | ||
self._connection = RetryingConnection(factory, self) | ||
self._batcher = SerialBatcher(self) | ||
self._outstanding_writes = [] | ||
self._receiver = None | ||
self._flusher = None | ||
|
||
@property | ||
def _partition(self) -> Partition: | ||
return Partition(self._initial.partition) | ||
|
||
async def __aenter__(self): | ||
await self._connection.__aenter__() | ||
|
||
def _start_loopers(self): | ||
assert self._receiver is None | ||
assert self._flusher is None | ||
self._receiver = asyncio.ensure_future(self._receive_loop()) | ||
self._flusher = asyncio.ensure_future(self._flush_loop()) | ||
|
||
async def _stop_loopers(self): | ||
if self._receiver: | ||
self._receiver.cancel() | ||
await self._receiver | ||
self._receiver = None | ||
if self._flusher: | ||
self._flusher.cancel() | ||
await self._flusher | ||
self._flusher = None | ||
|
||
def _handle_response(self, response: PublishResponse): | ||
if "message_response" not in response: | ||
self._connection.fail(FailedPrecondition("Received an invalid subsequent response on the publish stream.")) | ||
if not self._outstanding_writes: | ||
self._connection.fail( | ||
FailedPrecondition("Received an publish response on the stream with no outstanding publishes.")) | ||
next_offset: Cursor = response.message_response.start_cursor.offset | ||
batch: List[WorkItem[PubSubMessage]] = self._outstanding_writes.pop(0) | ||
for item in batch: | ||
item.response_future.set_result(Cursor(offset=next_offset)) | ||
next_offset += 1 | ||
|
||
async def _receive_loop(self): | ||
try: | ||
while True: | ||
response = await self._connection.read() | ||
self._handle_response(response) | ||
except asyncio.CancelledError: | ||
return | ||
|
||
async def _flush_loop(self): | ||
try: | ||
while True: | ||
await asyncio.sleep(self._flush_seconds) | ||
await self._flush() | ||
except asyncio.CancelledError: | ||
return | ||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb): | ||
if self._connection.error(): | ||
self._fail_if_retrying_failed() | ||
else: | ||
await self._flush() | ||
await self._connection.__aexit__(exc_type, exc_val, exc_tb) | ||
|
||
def _fail_if_retrying_failed(self): | ||
if self._connection.error(): | ||
for batch in self._outstanding_writes: | ||
for item in batch: | ||
item.response_future.set_exception(self._connection.error()) | ||
|
||
async def _flush(self): | ||
batch = self._batcher.flush() | ||
if not batch: | ||
return | ||
self._outstanding_writes.append(batch) | ||
aggregate = PublishRequest() | ||
aggregate.message_publish_request.messages = [item.request for item in batch] | ||
try: | ||
await self._connection.write(aggregate) | ||
except GoogleAPICallError as e: | ||
logging.debug(f"Failed publish on stream: {e}") | ||
self._fail_if_retrying_failed() | ||
|
||
async def publish(self, message: PubSubMessage) -> PublishMetadata: | ||
cursor_future = self._batcher.add(message) | ||
if self._batcher.should_flush(): | ||
await self._flush() | ||
return PublishMetadata(self._partition, await cursor_future) | ||
|
||
async def reinitialize(self, connection: Connection[PublishRequest, PublishResponse]): | ||
await self._stop_loopers() | ||
await connection.write(PublishRequest(initial_request=self._initial)) | ||
response = await connection.read() | ||
if "initial_response" not in response: | ||
self._connection.fail(FailedPrecondition("Received an invalid initial response on the publish stream.")) | ||
for batch in self._outstanding_writes: | ||
aggregate = PublishRequest() | ||
aggregate.message_publish_request.messages = [item.request for item in batch] | ||
await connection.write(aggregate) | ||
self._start_loopers() | ||
|
||
def test(self, requests: Iterable[PubSubMessage]) -> bool: | ||
request_count = 0 | ||
byte_count = 0 | ||
for req in requests: | ||
request_count += 1 | ||
byte_count += PubSubMessage.pb(req).ByteSize() | ||
return (request_count >= _MAX_MESSAGES) or (byte_count >= _MAX_BYTES) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,14 @@ | ||
import asyncio | ||
from typing import Generic, TypeVar | ||
from typing import Generic | ||
|
||
T = TypeVar('T') | ||
from google.cloud.pubsublite.internal.wire.connection import Request, Response | ||
|
||
|
||
class WorkItem(Generic[T]): | ||
class WorkItem(Generic[Request, Response]): | ||
"""An item of work and a future to complete when it is finished.""" | ||
request: T | ||
response_future: "asyncio.Future[None]" | ||
request: Request | ||
response_future: "asyncio.Future[Response]" | ||
|
||
def __init__(self, request: T): | ||
def __init__(self, request: Request): | ||
self.request = request | ||
self.response_future = asyncio.Future() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from typing import NamedTuple | ||
from google.cloud.pubsublite_v1.types.common import Cursor | ||
from google.cloud.pubsublite.partition import Partition | ||
|
||
|
||
class PublishMetadata(NamedTuple): | ||
partition: Partition | ||
cursor: Cursor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,31 @@ | ||
from typing import List, Union, Any | ||
import asyncio | ||
from typing import List, Union, Any, TypeVar, Generic, Optional | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
async def async_iterable(elts: List[Union[Any, Exception]]): | ||
for elt in elts: | ||
if isinstance(elt, Exception): | ||
raise elt | ||
yield elt | ||
|
||
|
||
def make_queue_waiter(started_q: "asyncio.Queue[None]", result_q: "asyncio.Queue[Union[T, Exception]]"): | ||
""" | ||
Given a queue to notify when started and a queue to get results from, return a waiter which | ||
notifies started_q when started and returns from result_q when done. | ||
""" | ||
|
||
async def waiter(*args, **kwargs): | ||
await started_q.put(None) | ||
result = await result_q.get() | ||
if isinstance(result, Exception): | ||
raise result | ||
return result | ||
|
||
return waiter | ||
|
||
|
||
class Box(Generic[T]): | ||
val: Optional[T] |
Oops, something went wrong.