From 14d637be22b86d5714fa3b34060784c375f4860f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 23 Mar 2024 10:09:14 -0700 Subject: [PATCH] memory limit padding + auto-scale crawler pods: - set memory limit to 1.2x memory request to provide extra padding and avoid OOM - attempt to resize crawler pods by 1.2 when exceeding 90% of available memory - do a 'soft OOM' (send extra SIGTERM) to pod when reaching 100% of requested memory, resulting in faster graceful restart --- backend/btrixcloud/k8sapi.py | 26 ++++++++++++ backend/btrixcloud/operator/baseoperator.py | 19 ++++++--- backend/btrixcloud/operator/crawls.py | 46 ++++++++++++++++----- chart/app-templates/crawler.yaml | 2 +- 4 files changed, 75 insertions(+), 18 deletions(-) diff --git a/backend/btrixcloud/k8sapi.py b/backend/btrixcloud/k8sapi.py index 578c22692..3ba7f965e 100644 --- a/backend/btrixcloud/k8sapi.py +++ b/backend/btrixcloud/k8sapi.py @@ -288,3 +288,29 @@ async def has_custom_jobs_with_label(self, plural, label) -> bool: # pylint: disable=broad-exception-caught except Exception: return False + + async def send_signal_to_pod(self, pod_name, signame) -> bool: + """send signal to all pods""" + command = ["bash", "-c", f"kill -s {signame} 1"] + signaled = False + + try: + print(f"Sending {signame} to {pod_name}", flush=True) + + res = await self.core_api_ws.connect_get_namespaced_pod_exec( + name=pod_name, + namespace=self.namespace, + command=command, + stdout=True, + ) + if res: + print("Result", res, flush=True) + + else: + signaled = True + + # pylint: disable=broad-except + except Exception as exc: + print(f"Send Signal Error: {exc}", flush=True) + + return signaled diff --git a/backend/btrixcloud/operator/baseoperator.py b/backend/btrixcloud/operator/baseoperator.py index b06d8bf05..9a3c67db7 100644 --- a/backend/btrixcloud/operator/baseoperator.py +++ b/backend/btrixcloud/operator/baseoperator.py @@ -42,28 +42,35 @@ def compute_crawler_resources(self): """compute memory / cpu resources for crawlers""" p = self.shared_params num = max(int(p["crawler_browser_instances"]) - 1, 0) + crawler_cpu: float = 0 + crawler_memory: int = 0 print("crawler resources") if not p.get("crawler_cpu"): base = parse_quantity(p["crawler_cpu_base"]) extra = parse_quantity(p["crawler_extra_cpu_per_browser"]) # cpu is a floating value of cpu cores - p["crawler_cpu"] = float(base + num * extra) + crawler_cpu = float(base + num * extra) - print(f"cpu = {base} + {num} * {extra} = {p['crawler_cpu']}") + print(f"cpu = {base} + {num} * {extra} = {crawler_cpu}") else: - print(f"cpu = {p['crawler_cpu']}") + crawler_cpu = float(parse_quantity(p["crawler_cpu"])) + print(f"cpu = {crawler_cpu}") if not p.get("crawler_memory"): base = parse_quantity(p["crawler_memory_base"]) extra = parse_quantity(p["crawler_extra_memory_per_browser"]) # memory is always an int - p["crawler_memory"] = int(base + num * extra) + crawler_memory = int(base + num * extra) - print(f"memory = {base} + {num} * {extra} = {p['crawler_memory']}") + print(f"memory = {base} + {num} * {extra} = {crawler_memory}") else: - print(f"memory = {p['crawler_memory']}") + crawler_memory = int(parse_quantity(p["crawler_memory"])) + print(f"memory = {crawler_memory}") + + p["crawler_cpu"] = crawler_cpu + p["crawler_memory"] = crawler_memory def compute_profile_resources(self): """compute memory /cpu resources for a single profile browser""" diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index c33afe1d5..d37d42ea8 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -61,6 +61,19 @@ EXEC_TIME_UPDATE_SECS = 60 +# scale up if exceeded this threshold of mem usage (eg. 90%) +MEM_SCALE_UP_THRESHOLD = 0.90 + +# scale up by this much +MEM_SCALE_UP = 1.2 + +# soft OOM if exceeded this threshold of mem usage (eg. 100%) +MEM_SOFT_OOM_THRESHOLD = 1.0 + +# set memory limit to this much of request for extra padding +MEM_LIMIT_PADDING = 1.2 + + # pylint: disable=too-many-public-methods, too-many-locals, too-many-branches, too-many-statements # pylint: disable=invalid-name, too-many-lines, too-many-return-statements # ============================================================================ @@ -210,7 +223,7 @@ async def sync_crawls(self, data: MCSyncData): ) # auto sizing handled here - self.handle_auto_size(crawl.id, status.podStatus) + await self.handle_auto_size(status.podStatus) if status.finished: return await self.finalize_response( @@ -326,6 +339,7 @@ def _load_crawler(self, params, i, status, children): params["name"] = name params["cpu"] = pod_info.newCpu or params.get("crawler_cpu") params["memory"] = pod_info.newMemory or params.get("crawler_memory") + params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING params["do_restart"] = ( pod_info.should_restart_pod() or params.get("force_restart") ) and has_pod @@ -1010,18 +1024,28 @@ async def add_used_stats(self, crawl_id, pod_status, redis, metrics): pod_info.used.memory = int(parse_quantity(usage["memory"])) pod_info.used.cpu = float(parse_quantity(usage["cpu"])) - def handle_auto_size(self, _, pod_status): + async def handle_auto_size(self, pod_status) -> None: """auto scale pods here, experimental""" for name, pod in pod_status.items(): - # if pod crashed due to OOM, increase mem - # if pod.isNewExit and pod.reason == "oom": - # pod.newMemory = int(float(pod.allocated.memory) * 1.2) - # print(f"Resizing pod {name} -> mem {pod.newMemory} - OOM Detected") - - # if redis is using >0.90 of its memory, increase mem - if name.startswith("redis") and pod.get_percent_memory() > 0.90: - pod.newMemory = int(float(pod.allocated.memory) * 1.2) - print(f"Resizing pod {name} -> mem {pod.newMemory} - Redis Capacity") + mem_usage = pod.get_percent_memory() + # if pod is using >MEM_SCALE_UP_THRESHOLD of its memory, increase mem + if mem_usage > MEM_SCALE_UP_THRESHOLD: + pod.newMemory = int(float(pod.allocated.memory) * MEM_SCALE_UP) + print( + f"Mem {mem_usage}: Resizing pod {name} -> mem {pod.newMemory} - Scale Up" + ) + + # if crawler pod is using its OOM threshold, attempt a soft OOM + # via a second SIGTERM + if mem_usage >= MEM_SOFT_OOM_THRESHOLD and name.startswith("crawl"): + await self.k8s.send_signal_to_pod(name, "SIGTERM") + + # if any pod crashed due to OOM, increase mem + elif pod.isNewExit and pod.reason == "oom": + pod.newMemory = int(float(pod.allocated.memory) * MEM_SCALE_UP) + print( + f"Mem {mem_usage}: Resizing pod {name} -> mem {pod.newMemory} - OOM Detected" + ) async def log_crashes(self, crawl_id, pod_status, redis): """report/log any pod crashes here""" diff --git a/chart/app-templates/crawler.yaml b/chart/app-templates/crawler.yaml index e9ea1834d..ea9f4e33b 100644 --- a/chart/app-templates/crawler.yaml +++ b/chart/app-templates/crawler.yaml @@ -175,7 +175,7 @@ spec: resources: limits: - memory: "{{ memory }}" + memory: "{{ memory_limit }}" requests: cpu: "{{ cpu }}"