Skip to content

Commit

Permalink
refactor waiting to its own while loop
Browse files Browse the repository at this point in the history
This allows for two things:
 - we no longer recompute values that are irrelevant at this stage
 - we no longer risk allowing the dispatcher to cancel the first
   request after we have started waiting for additional requests
   to arrive
  • Loading branch information
sauyon committed Mar 9, 2023
1 parent b4b7dc2 commit cb82296
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/bentoml/_internal/marshal/dispatcher.py
Expand Up @@ -366,17 +366,23 @@ async def controller(self):
continue
await asyncio.sleep(self.tick_interval)
continue
if (

# we are now free to dispatch whenever we like
while (
# if we don't already have enough requests,
n < self.max_batch_size
# we are not about to cancel the first request,
and latency_0 + dt > self.max_latency_in_ms * 0.95
and latency_0 + dt <= self.max_latency_in_ms * 0.95
# and waiting will cause average latency to decrese
and n * (wn + dt + a) <= self.optimizer.wait * decay
):
n = len(self._queue)
now = time.time()
wn = now - self._queue[-1][0]
latency_0 += dt

# wait for additional requests to arrive
await asyncio.sleep(self.tick_interval)
continue

n_call_out = min(self.max_batch_size, n)
# call
Expand Down

0 comments on commit cb82296

Please sign in to comment.