Skip to content

Commit

Permalink
Always fail crawl if failOnFailedSeed set and seed is 4xx/5xx
Browse files Browse the repository at this point in the history
  • Loading branch information
tw4l committed May 6, 2024
1 parent ddc3e10 commit 29903e3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
6 changes: 5 additions & 1 deletion src/crawler.ts
Expand Up @@ -1727,8 +1727,12 @@ self.__bx_behaviors.selectMainBehavior();

let failed = isChromeError;

if (this.params.failOnInvalidStatus && status >= 400) {
if (
(this.params.failOnInvalidStatus || failCrawlOnError) &&
status >= 400
) {
// Handle 4xx or 5xx response as a page load error
// Always fail seeds on 4xx or 5xx response if failOnFailedSeed is set
failed = true;
}

Expand Down
6 changes: 3 additions & 3 deletions tests/seeds.test.js
Expand Up @@ -7,7 +7,7 @@ test("ensure one invalid seed doesn't end crawl if failOnFailedSeed is not set",
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url example.com/invalid-seed --generateWACZ --limit 1 --collection invalidseed",
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://example.invalid --generateWACZ --limit 1 --collection invalidseed",
);
} catch (error) {
console.log(error);
Expand All @@ -20,7 +20,7 @@ test("ensure one invalid seed fails crawl if failOnFailedSeed is set", async ()
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url example.com/invalid-seed --generateWACZ --limit 1 --failOnFailedSeed --collection failseed",
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://example.invalid --generateWACZ --limit 1 --failOnFailedSeed --collection failseed",
);
} catch (error) {
passed = false;
Expand All @@ -32,7 +32,7 @@ test("ensure crawl fails if no valid seeds are passed", async () => {
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url iana.org/ --url example.com/invalid-seed --generateWACZ --limit 1 --collection allinvalidseeds",
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url iana.org/ --url https://example.invalid --generateWACZ --limit 1 --collection allinvalidseeds",
);
} catch (error) {
passed = false;
Expand Down

0 comments on commit 29903e3

Please sign in to comment.