-
-
Notifications
You must be signed in to change notification settings - Fork 71
/
Dockerfile
64 lines (43 loc) · 1.77 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
ARG BROWSER_VERSION=1.64.109
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:brave-${BROWSER_VERSION}
FROM ${BROWSER_IMAGE_BASE}
# needed to add args to main build stage
ARG BROWSER_VERSION
ENV PROXY_HOST=localhost \
PROXY_PORT=8080 \
PROXY_CA_URL=http://wsgiprox/download/pem \
PROXY_CA_FILE=/tmp/proxy-ca.pem \
DISPLAY=:99 \
GEOMETRY=1360x1020x16 \
BROWSER_VERSION=${BROWSER_VERSION} \
BROWSER_BIN=google-chrome \
OPENSSL_CONF=/app/openssl.conf \
VNC_PASS=vncpassw0rd! \
DETACHED_CHILD_PROC=1
WORKDIR /app
ADD requirements.txt /app/
RUN pip install -U setuptools; pip install -r requirements.txt
ADD package.json /app/
# to allow forcing rebuilds from this stage
ARG REBUILD
# Prefetch tldextract so pywb is able to boot in environments with limited internet access
RUN tldextract --update
# Download and format ad host blocklist as JSON
RUN mkdir -p /tmp/ads && cd /tmp/ads && \
curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
cat ad-hosts.txt | grep '^0.0.0.0 '| awk '{ print $2; }' | grep -v '0.0.0.0' | jq --raw-input --slurp 'split("\n")' > /app/ad-hosts.json && \
rm /tmp/ads/ad-hosts.txt
RUN yarn install --network-timeout 1000000
ADD tsconfig.json /app/
ADD src /app/src
RUN yarn run tsc
ADD config/ /app/
ADD html/ /app/html/
RUN chmod a+x /app/dist/main.js /app/dist/create-login-profile.js
RUN ln -s /app/dist/main.js /usr/bin/crawl; ln -s /app/dist/create-login-profile.js /usr/bin/create-login-profile
WORKDIR /crawls
# enable to test custom behaviors build (from browsertrix-behaviors)
# COPY behaviors.js /app/node_modules/browsertrix-behaviors/dist/behaviors.js
ADD docker-entrypoint.sh /docker-entrypoint.sh
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["crawl"]