From 14d637be22b86d5714fa3b34060784c375f4860f Mon Sep 17 00:00:00 2001
From: Ilya Kreymer <ikreymer@gmail.com>
Date: Sat, 23 Mar 2024 10:09:14 -0700
Subject: [PATCH] memory limit padding + auto-scale crawler pods: - set memory
 limit to 1.2x memory request to provide extra padding and avoid OOM - attempt
 to resize crawler pods by 1.2 when exceeding 90% of available memory - do a
 'soft OOM' (send extra SIGTERM) to pod when reaching 100% of requested
 memory, resulting in faster graceful restart

---
 backend/btrixcloud/k8sapi.py                | 26 ++++++++++++
 backend/btrixcloud/operator/baseoperator.py | 19 ++++++---
 backend/btrixcloud/operator/crawls.py       | 46 ++++++++++++++++-----
 chart/app-templates/crawler.yaml            |  2 +-
 4 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/backend/btrixcloud/k8sapi.py b/backend/btrixcloud/k8sapi.py
index 578c22692..3ba7f965e 100644
--- a/backend/btrixcloud/k8sapi.py
+++ b/backend/btrixcloud/k8sapi.py
@@ -288,3 +288,29 @@ async def has_custom_jobs_with_label(self, plural, label) -> bool:
         # pylint: disable=broad-exception-caught
         except Exception:
             return False
+
+    async def send_signal_to_pod(self, pod_name, signame) -> bool:
+        """send signal to all pods"""
+        command = ["bash", "-c", f"kill -s {signame} 1"]
+        signaled = False
+
+        try:
+            print(f"Sending {signame} to {pod_name}", flush=True)
+
+            res = await self.core_api_ws.connect_get_namespaced_pod_exec(
+                name=pod_name,
+                namespace=self.namespace,
+                command=command,
+                stdout=True,
+            )
+            if res:
+                print("Result", res, flush=True)
+
+            else:
+                signaled = True
+
+        # pylint: disable=broad-except
+        except Exception as exc:
+            print(f"Send Signal Error: {exc}", flush=True)
+
+        return signaled
diff --git a/backend/btrixcloud/operator/baseoperator.py b/backend/btrixcloud/operator/baseoperator.py
index b06d8bf05..9a3c67db7 100644
--- a/backend/btrixcloud/operator/baseoperator.py
+++ b/backend/btrixcloud/operator/baseoperator.py
@@ -42,28 +42,35 @@ def compute_crawler_resources(self):
         """compute memory / cpu resources for crawlers"""
         p = self.shared_params
         num = max(int(p["crawler_browser_instances"]) - 1, 0)
+        crawler_cpu: float = 0
+        crawler_memory: int = 0
         print("crawler resources")
         if not p.get("crawler_cpu"):
             base = parse_quantity(p["crawler_cpu_base"])
             extra = parse_quantity(p["crawler_extra_cpu_per_browser"])
 
             # cpu is a floating value of cpu cores
-            p["crawler_cpu"] = float(base + num * extra)
+            crawler_cpu = float(base + num * extra)
 
-            print(f"cpu = {base} + {num} * {extra} = {p['crawler_cpu']}")
+            print(f"cpu = {base} + {num} * {extra} = {crawler_cpu}")
         else:
-            print(f"cpu = {p['crawler_cpu']}")
+            crawler_cpu = float(parse_quantity(p["crawler_cpu"]))
+            print(f"cpu = {crawler_cpu}")
 
         if not p.get("crawler_memory"):
             base = parse_quantity(p["crawler_memory_base"])
             extra = parse_quantity(p["crawler_extra_memory_per_browser"])
 
             # memory is always an int
-            p["crawler_memory"] = int(base + num * extra)
+            crawler_memory = int(base + num * extra)
 
-            print(f"memory = {base} + {num} * {extra} = {p['crawler_memory']}")
+            print(f"memory = {base} + {num} * {extra} = {crawler_memory}")
         else:
-            print(f"memory = {p['crawler_memory']}")
+            crawler_memory = int(parse_quantity(p["crawler_memory"]))
+            print(f"memory = {crawler_memory}")
+
+        p["crawler_cpu"] = crawler_cpu
+        p["crawler_memory"] = crawler_memory
 
     def compute_profile_resources(self):
         """compute memory /cpu resources for a single profile browser"""
diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py
index c33afe1d5..d37d42ea8 100644
--- a/backend/btrixcloud/operator/crawls.py
+++ b/backend/btrixcloud/operator/crawls.py
@@ -61,6 +61,19 @@
 EXEC_TIME_UPDATE_SECS = 60
 
 
+# scale up if exceeded this threshold of mem usage (eg. 90%)
+MEM_SCALE_UP_THRESHOLD = 0.90
+
+# scale up by this much
+MEM_SCALE_UP = 1.2
+
+# soft OOM if exceeded this threshold of mem usage (eg. 100%)
+MEM_SOFT_OOM_THRESHOLD = 1.0
+
+# set memory limit to this much of request for extra padding
+MEM_LIMIT_PADDING = 1.2
+
+
 # pylint: disable=too-many-public-methods, too-many-locals, too-many-branches, too-many-statements
 # pylint: disable=invalid-name, too-many-lines, too-many-return-statements
 # ============================================================================
@@ -210,7 +223,7 @@ async def sync_crawls(self, data: MCSyncData):
             )
 
             # auto sizing handled here
-            self.handle_auto_size(crawl.id, status.podStatus)
+            await self.handle_auto_size(status.podStatus)
 
             if status.finished:
                 return await self.finalize_response(
@@ -326,6 +339,7 @@ def _load_crawler(self, params, i, status, children):
         params["name"] = name
         params["cpu"] = pod_info.newCpu or params.get("crawler_cpu")
         params["memory"] = pod_info.newMemory or params.get("crawler_memory")
+        params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING
         params["do_restart"] = (
             pod_info.should_restart_pod() or params.get("force_restart")
         ) and has_pod
@@ -1010,18 +1024,28 @@ async def add_used_stats(self, crawl_id, pod_status, redis, metrics):
             pod_info.used.memory = int(parse_quantity(usage["memory"]))
             pod_info.used.cpu = float(parse_quantity(usage["cpu"]))
 
-    def handle_auto_size(self, _, pod_status):
+    async def handle_auto_size(self, pod_status) -> None:
         """auto scale pods here, experimental"""
         for name, pod in pod_status.items():
-            # if pod crashed due to OOM, increase mem
-            # if pod.isNewExit and pod.reason == "oom":
-            #    pod.newMemory = int(float(pod.allocated.memory) * 1.2)
-            #    print(f"Resizing pod {name} -> mem {pod.newMemory} - OOM Detected")
-
-            # if redis is using >0.90 of its memory, increase mem
-            if name.startswith("redis") and pod.get_percent_memory() > 0.90:
-                pod.newMemory = int(float(pod.allocated.memory) * 1.2)
-                print(f"Resizing pod {name} -> mem {pod.newMemory} - Redis Capacity")
+            mem_usage = pod.get_percent_memory()
+            # if pod is using >MEM_SCALE_UP_THRESHOLD of its memory, increase mem
+            if mem_usage > MEM_SCALE_UP_THRESHOLD:
+                pod.newMemory = int(float(pod.allocated.memory) * MEM_SCALE_UP)
+                print(
+                    f"Mem {mem_usage}: Resizing pod {name} -> mem {pod.newMemory} - Scale Up"
+                )
+
+                # if crawler pod is using its OOM threshold, attempt a soft OOM
+                # via a second SIGTERM
+                if mem_usage >= MEM_SOFT_OOM_THRESHOLD and name.startswith("crawl"):
+                    await self.k8s.send_signal_to_pod(name, "SIGTERM")
+
+            # if any pod crashed due to OOM, increase mem
+            elif pod.isNewExit and pod.reason == "oom":
+                pod.newMemory = int(float(pod.allocated.memory) * MEM_SCALE_UP)
+                print(
+                    f"Mem {mem_usage}: Resizing pod {name} -> mem {pod.newMemory} - OOM Detected"
+                )
 
     async def log_crashes(self, crawl_id, pod_status, redis):
         """report/log any pod crashes here"""
diff --git a/chart/app-templates/crawler.yaml b/chart/app-templates/crawler.yaml
index e9ea1834d..ea9f4e33b 100644
--- a/chart/app-templates/crawler.yaml
+++ b/chart/app-templates/crawler.yaml
@@ -175,7 +175,7 @@ spec:
 
       resources:
         limits:
-          memory: "{{ memory }}"
+          memory: "{{ memory_limit }}"
 
         requests:
           cpu: "{{ cpu }}"