From 806672555333ea053dcae85a8c311bc97336e1e5 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer <ikreymer@gmail.com>
Date: Sat, 11 Nov 2023 00:50:26 -0800
Subject: [PATCH 1/2] add horizontal pod autoscaler for backend and frontend: -
 use cpu and memory averages - adjust base memory + cpu for backend -
 threshold set to 80% cpu and 95% memory utilization by default (configurable
 in values.yaml) - instead of backend and frontend replicas, set max replicas
 in values.yaml - only enable hpa if backend_max_replicas or
 frontend_max_replicas is >1, default to 1 for now

---
 chart/templates/backend.yaml  | 32 +++++++++++++++++++++++++++++++-
 chart/templates/frontend.yaml | 32 ++++++++++++++++++++++++++++++--
 chart/values.yaml             | 28 ++++++++++++++++++++++++----
 3 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/chart/templates/backend.yaml b/chart/templates/backend.yaml
index dd148d6352..ea9c744183 100644
--- a/chart/templates/backend.yaml
+++ b/chart/templates/backend.yaml
@@ -10,7 +10,9 @@ spec:
     matchLabels:
       app: {{ .Values.name }}
       role: backend
-  replicas: {{ .Values.backend_num_replicas }}
+{{- if eq (int .Values.backend_max_replicas) 1 }}
+  replicas: 1
+{{- end }}
   template:
     metadata:
       labels:
@@ -232,3 +234,31 @@ spec:
     - protocol: TCP
       port: {{ .Values.opPort }}
       name: operator
+
+{{- if gt (int .Values.backend_max_replicas) 1 }}
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: backend-autoscaler
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ .Values.name }}-backend
+  minReplicas: 1
+  maxReplicas: {{ .Values.backend_max_replicas }}
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: {{ .Values.backend_avg_cpu_threshold }}
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: {{ .Values.backend_avg_memory_threshold }}
+{{- end }}
diff --git a/chart/templates/frontend.yaml b/chart/templates/frontend.yaml
index f3a3d39a23..4527858630 100644
--- a/chart/templates/frontend.yaml
+++ b/chart/templates/frontend.yaml
@@ -10,7 +10,9 @@ spec:
     matchLabels:
       app: {{ .Values.name }}
       role: frontend
-  replicas: {{ .Values.frontend_num_replicas | default 1 }}
+  {{- if eq (int .Values.frontend_max_replicas) 1 }}
+  replicas: 1
+  {{- end }}
   template:
     metadata:
       labels:
@@ -110,4 +112,30 @@ spec:
   {{- end }}
 
 
-
+{{- if gt (int .Values.frontend_max_replicas) 1 }}
+---
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: frontend-autoscaler
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ .Values.name }}-frontend
+  minReplicas: 1
+  maxReplicas: {{ .Values.frontend_max_replicas }}
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: {{ .Values.frontend_avg_cpu_threshold }}
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: {{ .Values.frontend_avg_memory_threshold }}
+{{ end }}
diff --git a/chart/values.yaml b/chart/values.yaml
index f6c4235724..82c0ec7afc 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -91,13 +91,10 @@ backend_pull_policy: "Always"
 
 backend_password_secret: "PASSWORD!"
 
-# number of backend pods
-backend_num_replicas: 1
-
 # number of workers per pod
 backend_workers: 1
 
-backend_cpu: "25m"
+backend_cpu: "100m"
 
 backend_memory: "350Mi"
 
@@ -114,6 +111,17 @@ profile_browser_idle_seconds: 60
 # mostly intended for debugging / testing
 # log_failed_crawl_lines: 200
 
+# Autoscale
+# ---------
+# max number of backend pods to scale to
+# if > 1, will enable HPA for backend
+backend_max_replicas: 1
+
+# scale up if avg cpu utilization exceeds
+backend_avg_cpu_threshold: 80
+
+# scale up if avg memory utilization exceeds
+backend_avg_memory_threshold: 95
 
 # Nginx Image
 # =========================================
@@ -132,6 +140,18 @@ local_service_port: 30870
 
 frontend_alias: "http://browsertrix-cloud-frontend"
 
+# Autoscaling
+# -----------
+# max number of backend pods to scale to
+# if > 1, will enable HPA for frontend
+frontend_max_replicas: 1
+
+# scale up if avg cpu utilization exceeds
+frontend_avg_cpu_threshold: 80
+
+# scale up if avg memory utilization exceeds
+frontend_avg_memory_threshold: 95
+
 
 # MongoDB Image
 # =========================================

From 8ec2e0766fbed857221205f7f23175ecd5eb9321 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer <ikreymer@gmail.com>
Date: Thu, 28 Mar 2024 10:40:22 -0700
Subject: [PATCH 2/2] add docs for hpa

---
 docs/deploy/customization.md | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/docs/deploy/customization.md b/docs/deploy/customization.md
index f145ff7fd3..e50349e97a 100644
--- a/docs/deploy/customization.md
+++ b/docs/deploy/customization.md
@@ -78,6 +78,36 @@ storages:
     endpoint_url: "http://s3provider.example.com"
 ```
 
+## Horizontal Autoscaling
+
+Browsertrix also includes support for horizontal auto-scaling for both the backend and frontend pods.
+The auto-scaling will start a new pod when memory/cpu utilization reaches the thresholds.
+
+To use auto-scaling, the [metrics-server](https://github.com/kubernetes-sigs/metrics-server) cluster add-on is required.
+Many k8s provides include metrics server by default, others, like MicroK8S, make it available as an add-on.
+
+To enable auto-scaling, set `backend_max_replicas` and/or `frontend_max_replicas` to a value >1.
+
+```yaml
+backend_max_replicas: 2
+
+frontend_max_replicas: 2
+```
+
+By default, the auto-scaling uses the following thresholds for deciding when to start a new pod can also
+be modified. The default values are:
+
+```yaml
+backend_avg_cpu_threshold: 80
+
+backend_avg_memory_threshold: 95
+
+frontend_avg_cpu_threshold: 80
+
+frontend_avg_memory_threshold: 95
+```
+
+
 ## Email / SMTP Server
 
 Browsertrix sends user invitations, password resets, background job failure notifications, and other important messages via email. The `email` setting can be used to configure the SMTP server used to send emails. To avoid email messages from Browsertrix being flagged as spam, be sure to use the same domain for `sender_email` and `reply_to_email`.