From 8898416027a885102a95245f5426616d290d021c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 22 Feb 2024 16:14:25 -0800 Subject: [PATCH 1/3] warc: add Network.resourceType (https://chromedevtools.github.io/devtools-protocol/tot/Network/#type-ResourceType) as WARC-Resource-Type header for response/request pairs fixes #451 --- src/util/recorder.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/util/recorder.ts b/src/util/recorder.ts index aa8adca82..cfba155d9 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -1444,6 +1444,10 @@ function createResponse( "WARC-Page-ID": pageid, }; + if (reqresp.resourceType) { + warcHeaders["WARC-Resource-Type"] = reqresp.resourceType; + } + if (!contentIter) { contentIter = [reqresp.payload] as Iterable; } @@ -1492,6 +1496,10 @@ function createRequest( "WARC-Page-ID": pageid, }; + if (reqresp.resourceType) { + warcHeaders["WARC-Resource-Type"] = reqresp.resourceType; + } + const date = responseRecord.warcDate || undefined; return WARCRecord.create( From a02fe8a573c86b7ab2c14925b378365640497651 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 4 Mar 2024 17:18:44 -0800 Subject: [PATCH 2/3] use lowercase string for resourceType to match puppeteer/playwright usage --- src/util/recorder.ts | 6 ++++-- src/util/reqresp.ts | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/util/recorder.ts b/src/util/recorder.ts index cfba155d9..6765b0777 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -203,7 +203,7 @@ export class Recorder { }); const reqresp = this.pendingReqResp(params.requestId, true); if (reqresp) { - reqresp.resourceType = params.this.addPageRecord(reqresp); + this.addPageRecord(reqresp); this.removeReqResp(params.requestId); } @@ -333,7 +333,9 @@ export class Recorder { } const { url } = reqresp; - reqresp.resourceType = type; + if (type) { + reqresp.resourceType = type.toLowerCase(); + } switch (errorText) { case "net::ERR_BLOCKED_BY_CLIENT": diff --git a/src/util/reqresp.ts b/src/util/reqresp.ts index 6e48f0d02..fbcd9a4ab 100644 --- a/src/util/reqresp.ts +++ b/src/util/reqresp.ts @@ -85,7 +85,10 @@ export class RequestResponseInfo { this.responseHeadersList = params.responseHeaders; this.fetch = true; - this.resourceType = params.resourceType; + + if (params.resourceType) { + this.resourceType = params.resourceType.toLowerCase(); + } this.frameId = params.frameId; } @@ -109,7 +112,7 @@ export class RequestResponseInfo { this.protocol = response.protocol; if (type) { - this.resourceType = type; + this.resourceType = type.toLowerCase(); } if (response.requestHeaders) { From 58ee1671989708227b9f28e2c806afdf30130016 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 4 Mar 2024 17:30:04 -0800 Subject: [PATCH 3/3] fix tests to lowercase --- tests/pageinfo-records.test.js | 50 +++++++++++++++++----------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/pageinfo-records.test.js b/tests/pageinfo-records.test.js index 4566601d7..8642190c7 100644 --- a/tests/pageinfo-records.test.js +++ b/tests/pageinfo-records.test.js @@ -56,68 +56,68 @@ function validateResourcesIndex(json) { "https://webrecorder.net/": { status: 200, mime: "text/html", - type: "Document", + type: "document", }, "https://webrecorder.net/assets/fontawesome/all.css": { status: 200, mime: "text/css", - type: "Stylesheet", + type: "stylesheet", }, "https://webrecorder.net/assets/wr-logo.svg": { status: 200, mime: "image/svg+xml", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/tools/awp-icon.png": { status: 200, mime: "image/png", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/tools/logo-pywb.png": { status: 200, mime: "image/png", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/tools/browsertrixcrawler.png": { status: 200, mime: "image/png", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/tools/rwp-icon.png": { status: 200, mime: "image/png", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/images/btrix-cloud.png": { status: 200, mime: "image/png", - type: "Image", + type: "image", }, "https://webrecorder.net/assets/main.css": { status: 200, mime: "text/css", - type: "Stylesheet", + type: "stylesheet", }, "https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap": - { status: 200, mime: "text/css", type: "Stylesheet" }, + { status: 200, mime: "text/css", type: "stylesheet" }, "https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap": - { status: 200, mime: "text/css", type: "Stylesheet" }, + { status: 200, mime: "text/css", type: "stylesheet" }, "https://stats.browsertrix.com/js/script.js": { status: 200, mime: "application/javascript", - type: "Script", + type: "script", }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2": - { status: 200, mime: "font/woff2", type: "Font" }, + { status: 200, mime: "font/woff2", type: "font" }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": - { status: 200, mime: "font/woff2", type: "Font" }, + { status: 200, mime: "font/woff2", type: "font" }, "https://webrecorder.net/assets/favicon.ico": { status: 200, mime: "image/vnd.microsoft.icon", - type: "Other", + type: "other", }, "https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2F&d=webrecorder.net": - { status: 202, mime: "text/plain", type: "XHR" }, + { status: 202, mime: "text/plain", type: "xhr" }, }); } @@ -131,35 +131,35 @@ function validateResourcesAbout(json) { "https://webrecorder.net/about": { status: 200, mime: "text/html", - type: "Document", + type: "document", }, "https://webrecorder.net/assets/main.css": { status: 200, mime: "text/css", - type: "Stylesheet", + type: "stylesheet", }, "https://webrecorder.net/assets/fontawesome/all.css": { status: 200, mime: "text/css", - type: "Stylesheet", + type: "stylesheet", }, "https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap": - { status: 200, mime: "text/css", type: "Stylesheet" }, + { status: 200, mime: "text/css", type: "stylesheet" }, "https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap": - { status: 200, mime: "text/css", type: "Stylesheet" }, + { status: 200, mime: "text/css", type: "stylesheet" }, "https://stats.browsertrix.com/js/script.js": { status: 200, mime: "application/javascript", - type: "Script", + type: "script", }, "https://webrecorder.net/assets/wr-logo.svg": { status: 200, mime: "image/svg+xml", - type: "Image", + type: "image", }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2": - { status: 200, mime: "font/woff2", type: "Font" }, + { status: 200, mime: "font/woff2", type: "font" }, "https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2": - { status: 200, mime: "font/woff2", type: "Font" }, + { status: 200, mime: "font/woff2", type: "font" }, }); }