-
-
Notifications
You must be signed in to change notification settings - Fork 28
/
values.yaml
350 lines (242 loc) · 8.88 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# Crawler Settings
# =========================================
# default time to run behaviors on each page (in seconds)
default_behavior_time_seconds: 300
# default time to wait for page to fully load before running behaviors (in seconds)
default_page_load_time_seconds: 120
# disk utilization threshold percentage - when used disk space passes
# this percentage of total, crawls will gracefully stop to prevent the
# disk from being filled
# This should be a string so that it can be included in crawler_args
disk_utilization_threshold: 90
# crawler logging flags
crawler_logging_opts: "stats,behaviors,debug"
# to enable, set to a value other than 'false'
crawler_extract_full_text: false
# max pages per crawl
# set to non-zero value to enforce global max pages per crawl limit
# if 0, there is no page limit (may need to adjust crawler/redis settings for larger crawls)
# if set, each workflow can have a lower limit, but not higher
max_pages_per_crawl: 50000
# default template for generate wacz files
# supports following interpolated vars:
# @ts - current timestamp
# @hostname - full hostname
# @hostsuffix - last 14-characters of hostname
# @id - full crawl id
default_crawl_filename_template: "@ts-@hostsuffix.wacz"
# advanced: additional args to be passed to the crawler
# this is mostly for testing of new/experimental crawler flags
# standard crawler options are covered with other options above
crawler_extra_args: ""
# max allowed crawl scale per crawl
max_crawl_scale: 3
# Cluster Settings
# =========================================
name: browsertrix-cloud
# when running in the cloud, set this value to cloud-specific block storage
# keep empty to use hostPath (eg. on minikube)
volume_storage_class:
# if set, set the node selector 'nodeType' for deployment pods
# main_node_type:
# if set, set the node selector 'nodeType' to this crawling pods
# crawler_node_type:
registration_enabled: "0"
jwt_token_lifetime_minutes: 1440
# if set to "1", allow inviting same user to same org multiple times
allow_dupe_invites: "0"
# number of seconds before pending invites expire - default is 7 days
invite_expire_seconds: 604800
# base url for replayweb.page
rwp_base_url: "https://cdn.jsdelivr.net/npm/replaywebpage@1.8.12/"
superuser:
# set this to enable a superuser admin
email: admin@example.com
# optional: if not set, automatically generated
# change or remove this
password: PASSW0RD!
# Set name for default organization created with superuser
default_org: "My Organization"
# API Image
# =========================================
backend_image: "docker.io/webrecorder/browsertrix-backend:1.10.0-beta.0"
backend_pull_policy: "Always"
backend_password_secret: "PASSWORD!"
# number of backend pods
backend_num_replicas: 1
# number of workers per pod
backend_workers: 1
backend_cpu: "25m"
backend_memory: "350Mi"
# port for operator service
opPort: 8756
job_cpu: "3m"
job_memory: "70Mi"
profile_browser_idle_seconds: 60
# if set, print last 'log_failed_crawl_lines' of each failed
# crawl pod to backend operator stdout
# mostly intended for debugging / testing
# log_failed_crawl_lines: 200
# Nginx Image
# =========================================
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.10.0-beta.0"
frontend_pull_policy: "Always"
frontend_cpu: "10m"
frontend_memory: "64Mi"
# if set, maps nginx to a fixed port on host machine
# must be between 30000 - 32767
# use for deployments on localhost when not using ingress
# if using ingress, this value is ignored
local_service_port: 30870
# MongoDB Image
# =========================================
mongo_local: true
mongo_host: "local-mongo.default"
mongo_image: "docker.io/library/mongo:6.0.5"
mongo_pull_policy: "IfNotPresent"
mongo_cpu: "12m"
mongo_memory: "512Mi"
mongo_auth:
# specify either username + password (for local mongo)
username: root
password: PASSWORD!
# or full URL (for remote mongo server)
# db_url: mongodb+srv://...
# Redis Image
# =========================================
redis_local: true
redis_image: "redis"
redis_pull_policy: "IfNotPresent"
redis_url: "redis://local-redis.default:6379/1"
redis_cpu: "10m"
redis_memory: "200Mi"
redis_storage: "3Gi"
# Crawler Channels
# =========================================
# Support for additional crawler release channels
# If more than one channel provided, a dropdown will be shown to users
# 'default' channel must always be included
crawler_channels:
- id: default
image: "docker.io/webrecorder/browsertrix-crawler:latest"
# Add, remove, or edit additional crawler versions below, for example:
# - id: custom_version
# image: "<DOCKER IMAGE>"
crawler_pull_policy: "Always"
crawler_namespace: "crawlers"
# optional: enable to use a persist volume claim for all crawls
# can be enabled to use a multi-write shared filesystem
# crawler_pv_claim: "nfs-shared-crawls"
# num retries
crawl_retries: 1000
# Crawler Resources
# -----------------
# base cpu for for 1 browser
crawler_cpu_base: 900m
# base memory per for 1 browser
crawler_memory_base: 1024Mi
# number of browsers per crawler instances
crawler_browser_instances: 2
# this value is added to crawler_cpu_base, for each additional browser
# crawler_cpu = crawler_cpu_base + crawler_pu_per_extra_browser * (crawler_browser_instances - 1)
crawler_extra_cpu_per_browser: 600m
crawler_extra_memory_per_browser: 768Mi
# if not set, defaults to the following, but can be overridden directly:
# crawler_cpu = crawler_cpu_base + crawler_cpu_per_extra_browser * (crawler_browser_instances - 1)
# crawler_cpu:
# if not set, defaults to the following, but can be overridden directly:
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
# crawler_memory:
# optional: defaults to crawler_memory_base and crawler_cpu_base if not set
# profile_browser_memory:
#
# profile_browser_cpu:
# Other Crawler Settings
# ----------------------
# minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ
crawler_storage: "22Gi"
# max size at which crawler will commit current crawl session
crawler_session_size_limit_bytes: "10000000000"
# max time in seconds after which crawler will restart, if set
crawler_session_time_limit_seconds: 18000
crawler_liveness_port: 6065
# optional: use socks5 proxy for crawler and profilebrowser
# crawler_socks_proxy_host: 192.0.2.1
# crawler_socks_proxy_port: 9050
# time to wait for graceful stop
grace_period: 1000
# Local Minio Pod (optional)
# =========================================
# set to true to use a local minio image
minio_local: true
# enable to allow access to minio console via specified port
# minio_local_console_port: 30091
minio_scheme: "http"
minio_host: "local-minio.default:9000"
minio_image: docker.io/minio/minio:RELEASE.2022-10-24T18-35-07Z
minio_mc_image: minio/mc
minio_pull_policy: "IfNotPresent"
minio_local_bucket_name: &local_bucket_name "btrix-data"
minio_cpu: "10m"
minio_memory: "1024Mi"
# Storage
# =========================================
# should include the local minio bucket, if enabled, and any other available buckets for default storage
storages:
- name: "default"
type: "s3"
access_key: "ADMIN"
secret_key: "PASSW0RD"
bucket_name: *local_bucket_name
endpoint_url: "http://local-minio.default:9000/"
# optional: duration in minutes for WACZ download links to be valid
# used by webhooks and replay
# max value = 10079 (one week minus one minute)
# storage_presign_duration_minutes: 10079
# Email Options
# =========================================
email:
# email sending is enabled when 'smtp_host' is set to non-empty value
#ex: smtp_host: smtp.gmail.com
smtp_host: ""
smtp_port: 587
sender_email: example@example.com
password: password
reply_to_email: example@example.com
use_tls: True
support_email: support@example.com
# Deployment options
# =========================================
# Ingress (Optional)
# Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
ingress:
#host: ""
cert_email: "test@example.com"
tls: false
# Optional: Uncomment to use your own cluster-issuer instead of default ACME https validation
# custom_cluster_issuer: custom_cluster_issuer-name
ingress_class: nginx
# Signing Options
# =========================================
# optionally enable signer
signer:
enabled: false
image: webrecorder/authsign:0.5.0
# host: <set to signer domain>
# cert_email: "test@example.com
# image_pull_policy: "IfNotPresent"
# auth_token: <set to custom value>
signer_cpu: "5m"
signer_memory: "40Mi"
# Optional: configure load balancing annotations
# service:
# annotations:
# service.beta.kubernetes.io/aws-load-balancer-internal: "true"
# helm.sh/resource-policy: keep
# Admin services (see Chart.yaml's dependencies)
# note: see `chart/examples/local-logging.yaml`
addons:
admin:
logging: false
# metacontroller: