-
-
Notifications
You must be signed in to change notification settings - Fork 20
/
util-node.js
93 lines (80 loc) · 2.58 KB
/
util-node.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
const crypto = require('crypto')
const htmlParser = require('html-parser')
const { JSDOM } = require('jsdom')
const { loremIpsum } = require('lorem-ipsum')
exports.truncate = require('html-truncate')
/**
* Returns hits per day, given the total `hits` and the `date` of publication.
* @param {number} hits
* @param {Date|number} date
* @return {number}
*/
exports.hitsPerDay = function (hits, date) {
let days = (Date.now() - new Date(date)) / 86400000
days = days || 0.00001 // prevent divide by zero
return Math.round(hits / days)
}
const defaultElementsWhitelist = [
'p', 'br',
'strong', 'b', 'em', 'i', 'u',
'ol', 'ul', 'li',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'div', 'span',
'sub', 'sup'
]
const defaultAttributesWhitelist = [
]
/**
* Sanitize dirty (user-provided) HTML to remove bad html tags. Uses a
* whitelist approach, where only the tags we explicitly allow are kept.
*
* @param {String} html dirty HTML
* @param {Array=} elementsWhitelist elements to keep
* @param {Array=} attributesWhitelist attributes to keep
* @return {String} sanitized HTML
*/
exports.sanitizeHTML = function (html, elementsWhitelist, attributesWhitelist) {
elementsWhitelist || (elementsWhitelist = defaultElementsWhitelist)
attributesWhitelist || (attributesWhitelist = defaultAttributesWhitelist)
const sanitized = htmlParser.sanitize(html, {
elements: function (name) {
return elementsWhitelist.indexOf(name) === -1
},
attributes: function (name) {
return attributesWhitelist.indexOf(name) === -1
},
comments: true,
doctype: true
})
return sanitized
}
exports.randomBytes = function (length, cb) {
if (typeof length === 'function') {
cb = length
length = 20
}
if (!cb) throw new Error('argument cb required')
crypto.randomBytes(length, function (err, buf) {
if (err) return cb(err)
cb(null, buf.toString('hex'))
})
}
exports.convertToPaywallText = function (html, numPreview) {
const { window } = new JSDOM(html)
const { document } = window
const nodes = document.querySelectorAll(
'body > :not(:first-child):not(:nth-child(2))'
)
Array.from(nodes).forEach(function (node) {
const words = node.innerHTML.split(' ').length
const sentences = node.innerHTML.split('. ').length
node.innerHTML = loremIpsum({
count: sentences,
sentenceLowerBound: Math.floor((words / sentences) / 1.5),
sentenceUpperBound: Math.ceil(words / sentences)
})
})
const paywallText = document.querySelector('body').innerHTML
window.close()
return paywallText
}