refactor waiting to its own while loop

This allows for two things: - we no longer recompute values that are irrelevant at this stage - we no longer risk allowing the dispatcher to cancel the first request after we have started waiting for additional requests to arrive
bentoml · Mar 9, 2023 · cb82296 · cb82296
1 parent b4b7dc2
commit cb82296
Showing 1 changed file with 9 additions and 3 deletions.
diff --git a/src/bentoml/_internal/marshal/dispatcher.py b/src/bentoml/_internal/marshal/dispatcher.py
@@ -366,17 +366,23 @@ async def controller(self):
                         continue
                     await asyncio.sleep(self.tick_interval)
                     continue
-                if (
+
+                # we are now free to dispatch whenever we like
+                while (
                     # if we don't already have enough requests,
                     n < self.max_batch_size
                     # we are not about to cancel the first request,
-                    and latency_0 + dt > self.max_latency_in_ms * 0.95
+                    and latency_0 + dt <= self.max_latency_in_ms * 0.95
                     # and waiting will cause average latency to decrese
                     and n * (wn + dt + a) <= self.optimizer.wait * decay
                 ):
+                    n = len(self._queue)
+                    now = time.time()
+                    wn = now - self._queue[-1][0]
+                    latency_0 += dt
+
                     # wait for additional requests to arrive
                     await asyncio.sleep(self.tick_interval)
-                    continue
 
                 n_call_out = min(self.max_batch_size, n)
                 # call