-
-
Notifications
You must be signed in to change notification settings - Fork 71
/
mult_url_crawl_with_favicon.test.js
48 lines (44 loc) · 1.5 KB
/
mult_url_crawl_with_favicon.test.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import child_process from "child_process";
import fs from "fs";
test("ensure multi url crawl run with docker run passes", async () => {
child_process.execSync(
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://webrecorder.net/ --generateWACZ --text --collection advanced --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --pages 2 --limit 2',
);
// child_process.execSync(
// "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler wacz validate --file collections/advanced/advanced.wacz",
// );
});
test("check that the favicon made it into the pages jsonl file", () => {
expect(
fs.existsSync("test-crawls/collections/advanced/pages/pages.jsonl"),
).toBe(true);
const data1 = JSON.parse(
fs
.readFileSync(
"test-crawls/collections/advanced/pages/pages.jsonl",
"utf8",
)
.split("\n")[1],
);
const data2 = JSON.parse(
fs
.readFileSync(
"test-crawls/collections/advanced/pages/pages.jsonl",
"utf8",
)
.split("\n")[2],
);
const data = [data1, data2];
for (const d of data) {
if (d.url === "https://webrecorder.net/") {
expect(d.favIconUrl).toEqual(
"https://webrecorder.net/assets/favicon.ico",
);
}
if (d.url === "https://iana.org/") {
expect(d.favIconUrl).toEqual(
"https://www.iana.org/_img/bookmark_icon.ico",
);
}
}
});