Skip to content

Commit

Permalink
Try to fix getting WACZs from local minio
Browse files Browse the repository at this point in the history
  • Loading branch information
tw4l committed Mar 14, 2024
1 parent ee764dc commit ac81831
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 2 deletions.
7 changes: 5 additions & 2 deletions backend/btrixcloud/storages.py
Expand Up @@ -77,6 +77,7 @@ def __init__(self, org_ops, crawl_manager) -> None:
self.crawl_manager = crawl_manager

self.is_local_minio = is_bool(os.environ.get("IS_LOCAL_MINIO"))
self.host_from_k8s = os.environ.get("HOST_FROM_K8S")

with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh:
storage_list = json.loads(fh.read())
Expand Down Expand Up @@ -597,7 +598,8 @@ def organize_based_on_instance_number(
for wacz_file in instance_list:
wacz_url = wacz_file.path
if wacz_url.startswith("/data"):
wacz_url = f"http://host.docker.internal:30870{wacz_url}"
wacz_url = f"{self.host_from_k8s}{wacz_url}"
print(f"WACZ URL: {wacz_url}", flush=True)

with RemoteZip(wacz_url) as remote_zip:
log_files: List[ZipInfo] = [
Expand Down Expand Up @@ -645,7 +647,8 @@ def stream_page_lines(
for wacz_file in wacz_files:
wacz_url = wacz_file.path
if wacz_url.startswith("/data"):
wacz_url = f"http://host.docker.internal:30870{wacz_url}"
wacz_url = f"{self.host_from_k8s}{wacz_url}"
print(f"WACZ URL: {wacz_url}", flush=True)

with RemoteZip(wacz_url) as remote_zip:
page_files: List[ZipInfo] = [
Expand Down
2 changes: 2 additions & 0 deletions chart/templates/configmap.yaml
Expand Up @@ -8,6 +8,8 @@ metadata:
data:
APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ .Values.ingress.host | default "localhost:9870" }}

HOST_FROM_K8S: {{ .Values.host_from_k8s | default "http://host.docker.internal:30870" }}

CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}

DEFAULT_NAMESPACE: {{ .Release.Namespace }}
Expand Down
2 changes: 2 additions & 0 deletions chart/test/microk8s-ci.yaml
Expand Up @@ -8,6 +8,8 @@ frontend_image: "localhost:32000/webrecorder/browsertrix-frontend:latest"
backend_pull_policy: "IfNotPresent"
frontend_pull_policy: "IfNotPresent"

host_from_k8s: http://10.0.1.1:30870

# for testing only
crawler_extra_cpu_per_browser: 300m

Expand Down
2 changes: 2 additions & 0 deletions chart/test/test.yaml
Expand Up @@ -12,6 +12,8 @@ default_crawl_filename_template: "@ts-testing-@hostsuffix.wacz"

operator_resync_seconds: 3

host_from_k8s: http://host.k3d.internal:30870

# for testing only
crawler_extra_cpu_per_browser: 300m

Expand Down
7 changes: 7 additions & 0 deletions chart/values.yaml
Expand Up @@ -130,6 +130,13 @@ frontend_memory: "64Mi"
# if using ingress, this value is ignored
local_service_port: 30870

# Host address from within k8s
# Default is for docker desktop
# k3d: http://host.k3d.internal
# microk8s: http://10.0.1.1
# Port must match local_service_port
host_from_k8s: http://host.docker.internal:30870


# MongoDB Image
# =========================================
Expand Down

0 comments on commit ac81831

Please sign in to comment.