/
index.js
82 lines (73 loc) · 2.68 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
const fp = require("fastify-plugin");
const { JSDOM } = require("jsdom");
const { tidy } = require("htmltidy2");
const util = require("util");
const tags = require("language-tags");
const tidyP = util.promisify(tidy);
/**
* @author Frazer Smith
* @description Decorator plugin that adds function that uses HTMLTidy2 to
* parse and tidy HTML passed.
* @param {Function} server - Fastify instance.
*/
async function plugin(server) {
/**
* @param {string} html - Valid HTML.
* @param {object} options - Function config values.
* @param {string=} options.language - Set `lang` and `xml:lang` attributes of `<html>` tag.
* Defaults to `en` if not set.
* @param {boolean=} options.removeAlt - Set `alt` attributes in `<img>` tags to empty string if set to `true`.
* Useful for sending to clinical systems where img tags are stripped from received documents
* (i.e. TPP's SystmOne).
* @returns {string|Error} Tidied HTML; throws error if `options.language` is not valid IANA language tag.
*/
async function tidyHtml(html, options = {}) {
const dom = new JSDOM(html);
// Set document language if valid IANA language tag and subtag
const language = options?.language || "en";
if (tags.check(language)) {
const innerHtml = dom.window.document.querySelector("html");
innerHtml.setAttribute("lang", language);
innerHtml.setAttribute("xml:lang", language);
} else {
throw server.httpErrors.badRequest(
"querystring.language not a valid IANA language tag"
);
}
// Remove alt attribute from img tags
if (options?.removeAlt === true) {
const images = dom.window.document.querySelectorAll("img");
images.forEach((element) => {
element.setAttribute("alt", "");
});
}
const parsedHtml = dom.serialize();
/**
* Refer to http://api.html-tidy.org/tidy/tidylib_api_5.6.0/tidy_quickref.html for tidy options
*
* The following options have been turned on:
* - bare (remove Microsoft specific HTML and replace ` ` with spaces)
* - clean (replace legacy HTML tags)
* - dropProprietaryAttributes (remove proprietary attributes, such as Microsoft data binding attributes)
* - escapeCdata (convert <![CDATA[]]> sections to normal text)
* - hideComments (do not print HTML comment tags)
* - sortAttributes (sort attributes in element in ascending alphabetic sort)
*/
const config = {
bare: true,
clean: true,
dropProprietaryAttributes: true,
escapeCdata: true,
hideComments: true,
sortAttributes: "alpha",
};
const results = await tidyP(parsedHtml, config);
return results;
}
server.decorate("tidyHtml", tidyHtml);
}
module.exports = fp(plugin, {
fastify: "3.x",
name: "tidy-html",
dependencies: ["fastify-sensible"],
});