Skip to content

Commit

Permalink
Remove ads and cookie banners from HTML snapshots (#695)
Browse files Browse the repository at this point in the history
* integrate ublock with single-file

* reuse chromium profile
  • Loading branch information
sissbruecker committed Apr 14, 2024
1 parent 22a1fc8 commit 25470ed
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 10 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,6 @@ typings/
/tmp
# Database file
/data
# ublock + chromium
/uBlock0.chromium
/chromium-profile
7 changes: 4 additions & 3 deletions bookmarks/services/singlefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ class SingeFileError(Exception):

def create_snapshot(url: str, filepath: str):
singlefile_path = settings.LD_SINGLEFILE_PATH
# parse string to list of arguments
singlefile_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
# parse options to list of arguments
ublock_options = shlex.split(settings.LD_SINGLEFILE_UBLOCK_OPTIONS)
custom_options = shlex.split(settings.LD_SINGLEFILE_OPTIONS)
temp_filepath = filepath + ".tmp"
# concat lists
args = [singlefile_path] + singlefile_options + [url, temp_filepath]
args = [singlefile_path] + ublock_options + custom_options + [url, temp_filepath]
try:
# Use start_new_session=True to create a new process group
process = subprocess.Popen(args, start_new_session=True)
Expand Down
14 changes: 11 additions & 3 deletions bookmarks/tests/test_singlefile_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def test_create_snapshot_empty_options(self):

expected_args = [
"single-file",
'--browser-arg="--headless=new"',
'--browser-arg="--user-data-dir=./chromium-profile"',
'--browser-arg="--no-sandbox"',
'--browser-arg="--load-extension=uBlock0.chromium"',
"http://example.com",
self.html_filepath + ".tmp",
]
Expand All @@ -79,6 +83,10 @@ def test_create_snapshot_custom_options(self):

expected_args = [
"single-file",
'--browser-arg="--headless=new"',
'--browser-arg="--user-data-dir=./chromium-profile"',
'--browser-arg="--no-sandbox"',
'--browser-arg="--load-extension=uBlock0.chromium"',
"--some-option",
"some value",
"--another-option",
Expand All @@ -97,9 +105,9 @@ def test_create_snapshot_default_timeout_setting(self):
with mock.patch("subprocess.Popen", return_value=mock_process):
singlefile.create_snapshot("http://example.com", self.html_filepath)

mock_process.wait.assert_called_with(timeout=60)
mock_process.wait.assert_called_with(timeout=120)

@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=120)
@override_settings(LD_SINGLEFILE_TIMEOUT_SEC=180)
def test_create_snapshot_custom_timeout_setting(self):
mock_process = mock.Mock()
mock_process.wait.return_value = 0
Expand All @@ -108,4 +116,4 @@ def test_create_snapshot_custom_timeout_setting(self):
with mock.patch("subprocess.Popen", return_value=mock_process):
singlefile.create_snapshot("http://example.com", self.html_filepath)

mock_process.wait.assert_called_with(timeout=120)
mock_process.wait.assert_called_with(timeout=180)
5 changes: 4 additions & 1 deletion bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ mkdir -p data
mkdir -p data/favicons
# Create assets folder if it does not exist
mkdir -p data/assets
# Create chromium profile folder if it does not exist
mkdir -p chromium-profile

# Generate secret key file if it does not exist
python manage.py generate_secret_key
Expand All @@ -21,8 +23,9 @@ python manage.py create_initial_superuser
# Migrate legacy background tasks to Huey
python manage.py migrate_tasks

# Ensure the DB folder is owned by the right user
# Ensure folders are owned by the right user
chown -R www-data: /etc/linkding/data
chown -R www-data: /etc/linkding/chromium-profile

# Start background task processor using supervisord, unless explicitly disabled
if [ "$LD_DISABLE_BACKGROUND_TASKS" != "True" ]; then
Expand Down
21 changes: 20 additions & 1 deletion docker/alpine.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,29 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||
CMD ["./bootstrap.sh"]


FROM node:18-alpine AS ublock-build
WORKDIR /etc/linkding
# Install necessary tools
RUN apk add --no-cache curl jq unzip
# Fetch the latest release tag
# Download the library
# Unzip the library
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
unzip uBlock0.zip
# Patch assets.json to enable easylist-cookies by default
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
mv temp.json ./uBlock0.chromium/assets/assets.json


FROM linkding AS linkding-plus
# install node, chromium
RUN apk update && apk add nodejs npm chromium
# install single-file from fork for now, which contains several hotfixes
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
# copy uBlock0
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
# enable snapshot support
ENV LD_ENABLE_SNAPSHOTS=True
22 changes: 21 additions & 1 deletion docker/default.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,24 @@ CMD curl -f http://localhost:${LD_SERVER_PORT:-9090}/${LD_CONTEXT_PATH}health ||

CMD ["./bootstrap.sh"]


FROM node:18-alpine AS ublock-build
WORKDIR /etc/linkding
# Install necessary tools
RUN apk add --no-cache curl jq unzip
# Fetch the latest release tag
# Download the library
# Unzip the library
RUN TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name') && \
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip && \
curl -L -o uBlock0.zip $DOWNLOAD_URL && \
unzip uBlock0.zip
# Patch assets.json to enable easylist-cookies by default
RUN curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
RUN jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json && \
mv temp.json ./uBlock0.chromium/assets/assets.json


FROM linkding AS linkding-plus
# install chromium
RUN apt-get update && apt-get -y install chromium
Expand All @@ -106,6 +124,8 @@ RUN apt-get install -y gnupg2 apt-transport-https ca-certificates && \
echo "deb [signed-by=/usr/share/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
apt-get update && apt-get install -y nodejs
# install single-file from fork for now, which contains several hotfixes
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/f3730995a52f27d5041a1ad9e7528af4b6b4cf4b
RUN npm install -g https://github.com/sissbruecker/single-file-cli/tarball/4c54b3bc704cfb3e96cec2d24854caca3df0b3b6
# copy uBlock0
COPY --from=ublock-build /etc/linkding/uBlock0.chromium uBlock0.chromium/
# enable snapshot support
ENV LD_ENABLE_SNAPSHOTS=True
13 changes: 13 additions & 0 deletions scripts/setup-ublock.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
rm -rf ublock0.chromium

TAG=$(curl -sL https://api.github.com/repos/gorhill/uBlock/releases/latest | jq -r '.tag_name')
DOWNLOAD_URL=https://github.com/gorhill/uBlock/releases/download/$TAG/uBlock0_$TAG.chromium.zip
curl -L -o uBlock0.zip $DOWNLOAD_URL
unzip uBlock0.zip
rm uBlock0.zip

curl -L -o ./uBlock0.chromium/assets/thirdparties/easylist/easylist-cookies.txt https://ublockorigin.github.io/uAssets/thirdparties/easylist-cookies.txt
jq '."fanboy-cookiemonster" |= del(.off) | ."fanboy-cookiemonster".contentURL += ["assets/thirdparties/easylist/easylist-cookies.txt"]' ./uBlock0.chromium/assets/assets.json > temp.json
mv temp.json ./uBlock0.chromium/assets/assets.json

mkdir -p chromium-profile
14 changes: 13 additions & 1 deletion siteroot/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import json
import os
import shlex

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -294,8 +295,19 @@
"1",
)
LD_SINGLEFILE_PATH = os.getenv("LD_SINGLEFILE_PATH", "single-file")
LD_SINGLEFILE_UBLOCK_OPTIONS = os.getenv(
"LD_SINGLEFILE_UBLOCK_OPTIONS",
shlex.join(
[
'--browser-arg="--headless=new"',
'--browser-arg="--user-data-dir=./chromium-profile"',
'--browser-arg="--no-sandbox"',
'--browser-arg="--load-extension=uBlock0.chromium"',
]
),
)
LD_SINGLEFILE_OPTIONS = os.getenv("LD_SINGLEFILE_OPTIONS", "")
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 60))
LD_SINGLEFILE_TIMEOUT_SEC = float(os.getenv("LD_SINGLEFILE_TIMEOUT_SEC", 120))

# Monolith isn't used at the moment, as the local snapshot implementation
# switched to single-file after the prototype. Keeping this around in case
Expand Down

0 comments on commit 25470ed

Please sign in to comment.