Skip to content

Commit

Permalink
Fix XSS vulnerability in links and changed I18N.md() to be understand…
Browse files Browse the repository at this point in the history
… recursive tags #281 #600 #601
  • Loading branch information
Jan Henning Thorsen committed Dec 31, 2021
1 parent 5c7a2de commit 86b2193
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 99 deletions.
8 changes: 7 additions & 1 deletion Changes
@@ -1,9 +1,15 @@
Revision history for perl distribution Convos

6.49 Not Released
- Replaced CONVOS_DEBUG=1 with CONVOS_LOG_LEVEL=trace
- Fix XSS vulnerability in links
Contributor: Pocas
Reference: https://huntr.dev/bounties/4532a0ac-4e7c-4fcf-9fe3-630e132325c0
- Add EXPERIMENTAL support for IRC color and text formatting #281 #600
- Changed to hiding "https://" from generated links, but keep "http://"
- Changed I18N.md() to be understand recursive tags #601
- Updated Italian translation #654
Contributor: SerHack
- Replaced CONVOS_DEBUG=1 with CONVOS_LOG_LEVEL=trace

6.48 2021-12-29T19:40:00+0900
- Fix catching invalid nick change #628
Expand Down
107 changes: 73 additions & 34 deletions __tests__/store/I18N.js
Expand Up @@ -19,11 +19,22 @@ test('lmd()', () => {

// Entities should not be translated into "undefined"
expect(i18n.lmd('https://commons.wikimedia.org/wiki/File:HK_WCD_WC_%E7%81%A3%E4%BB%94_Wan_Chai_%E8%BB%92%E5%B0%BC%E8%A9%A9%E9%81%93_Hennessy_Road_tram_body_ads_Tsingtao_Brewery_August_2021_SS2.jpg'))
.toBe('<a href="https://commons.wikimedia.org/wiki/File:HK_WCD_WC_%E7%81%A3%E4%BB%94_Wan_Chai_%E8%BB%92%E5%B0%BC%E8%A9%A9%E9%81%93_Hennessy_Road_tram_body_ads_Tsingtao_Brewery_August_2021_SS2.jpg" target="_blank">https://commons.wikimedia.org/wiki/File:HK_WCD_WC_%E7%81%A3%E4%BB%94_Wan_Chai_%E8%BB%92%E5%B0%BC%E8%A9%A9%E9%81%93_Hennessy_Road_tram_body_ads_Tsingtao_Brewery_August_2021_SS2.jpg</a>');
.toBe('<a href="https://commons.wikimedia.org/wiki/File:HK_WCD_WC_%E7%81%A3%E4%BB%94_Wan_Chai_%E8%BB%92%E5%B0%BC%E8%A9%A9%E9%81%93_Hennessy_Road_tram_body_ads_Tsingtao_Brewery_August_2021_SS2.jpg" target="_blank">commons.wikimedia.org/wiki/File:HK_WCD_WC_%E7%81%A3%E4%BB%94_Wan_Chai_%E8%BB%92%E5%B0%BC%E8%A9%A9%E9%81%93_Hennessy_Road_tram_body_ads_Tsingtao_Brewery_August_2021_SS2.jpg</a>');
});

test('md - not lmd', () => {
expect(i18n.md('`code` %1 *is* cool.')).toBe('<code>code</code> %1 <em>is</em> cool.');
test('md - raw', () => {
expect(i18n.md('> [not a link](https://convos.chat) `<a href="#cool" onclick=""></a>`', {raw: true}))
.toBe('&gt; [not a link](https://convos.chat) `&lt;a href=&quot;#cool&quot; onclick=&quot;&quot;&gt;&lt;/a&gt;`');
});

test('md - whitespace', () => {
expect(i18n.md('')).toBe('&nbsp;');
expect(i18n.md('', {})).toBe('&nbsp;');
expect(i18n.md('', {raw: true})).toBe('&nbsp;');
expect(i18n.md(' f b a r ', {raw: true})).toBe('&nbsp;f &nbsp; b a &nbsp; &nbsp; &nbsp;r&nbsp;');
expect(i18n.md('')).toBe('&nbsp;');
expect(i18n.md(' ___ ___ _ ___ _____ ___'))
.toBe('&nbsp; &nbsp; ___ ___ &nbsp;_ &nbsp;___ &nbsp; _____ &nbsp;___');
});

test('md - unchanged', () => {
Expand All @@ -32,7 +43,51 @@ test('md - unchanged', () => {
});

test('md - blockquote', () => {
expect(i18n.md('> Some quote')).toBe('<blockquote>Some quote</blockquote>');
expect(i18n.md('> Some quote'))
.toBe('<blockquote>Some quote</blockquote>');
expect(i18n.md('> escape <a href="#foo">bar</a>'))
.toBe('<blockquote>escape &lt;a href=&quot;#foo&quot;&gt;bar&lt;/a&gt;</blockquote>');
});

test('md - code', () => {
expect(i18n.md('> Some `code example` yeah'))
.toBe('<blockquote>Some <code>code example</code> yeah</blockquote>');
expect(i18n.md('Some `code with **[foo](#bar)**`'))
.toBe('Some <code>code with **[foo](#bar)**</code>');
expect(i18n.md('single `a` char'))
.toBe('single <code>a</code> char');
expect(i18n.md('is this \\`not code`, or..?'))
.toBe('is this `not code`, or..?');
expect(i18n.md('is this `not code`, or..?'))
.toBe('is this <code>not code</code>, or..?');
expect(i18n.md('not a `https://link.com`'))
.toBe('not a <code>https://link.com</code>');
expect(i18n.md('a regexp: `TShop\.Setup\(\s*([{](?>[^\\"{}]+|"(?>[^\\"]+|\\[\S\s])*"|\\[\S\s]|(?-1))*[}])`'))
.toBe('a regexp: <code>TShop\.Setup\(\s*([{](?&gt;[^\\&quot;{}]+|&quot;(?&gt;[^\\&quot;]+|\\[\S\s])*&quot;|\\[\S\s]|(?-1))*[}])</code>');
expect(i18n.md('kikuchi` changed nick to kikuchi```.'))
.toBe('kikuchi` changed nick to kikuchi```.');
});

test('md - em, strong', () => {
expect(i18n.md('> Some *em text* right'))
.toBe('<blockquote>Some <em>em text</em> right</blockquote>');
expect(i18n.md('Some **strong text** right'))
.toBe('Some <strong>strong text</strong> right');
expect(i18n.md('Some ***strong em text*** right'))
.toBe('Some <em><strong>strong em text</strong></em> right');
expect(i18n.md('> Some * em text* right'))
.toBe('<blockquote>Some * em text* right</blockquote>');

// Quotes should always be escaped - Pretect against XSS
expect(i18n.md('Hey *foo* \'"**bar**"\' ***baz***!'))
.toBe('Hey <em>foo</em> &apos;&quot;<strong>bar</strong>&quot;&apos; <em><strong>baz</strong></em>!');
});

test('md - colors', () => {
expect(i18n.md('\x02bold text\x02')).toBe('<strong>bold text</strong>');
expect(i18n.md('\x1ditalic text\x1d')).toBe('<em>italic text</em>');
expect(i18n.md('\u00035colored text\x03')).toBe('<span style="color:brown">colored text</span>');
expect(i18n.md('\u00034,12colored text and background\u0003')).toBe('<span style="color:red;background-color:lightblue">colored text and background</span>');
});

test('md - emojis', () => {
Expand All @@ -44,47 +99,31 @@ test('md - emojis', () => {
.toMatch(/but <img.*[^>]+>. turns into an emoji/);
});

test('md - em, strong', () => {
expect(i18n.md('Hey *foo* "**bar**" ***baz***!'))
.toBe('Hey <em>foo</em> "<strong>bar</strong>" <em><strong>baz</strong></em>!');
});

test('md - markdown link', () => {
expect(i18n.md('some [cool chat](https://convos.chat)'))
.toBe('some <a href="https://convos.chat" target="_blank">cool chat</a>');
test('md - url', () => {
expect(i18n.md('A link to https://convos.chat, cool ey?'))
.toBe('A link to <a href="https://convos.chat" target="_blank">https://convos.chat</a>, cool ey?');
.toBe('A link to <a href="https://convos.chat" target="_blank">convos.chat</a>, cool ey?');
expect(i18n.md('A link to http://convos.chat, cool ey?'))
.toBe('A link to <a href="http://convos.chat" target="_blank">http://convos.chat</a>, cool ey?');
expect(i18n.md('A link to mailto:jhthorsen@cpan.org!'))
.toBe('A link to <a href="mailto:jhthorsen@cpan.org" target="_blank">jhthorsen@cpan.org</a>!');
expect(i18n.md('https://ru.wikipedia.org/wiki/Участница:Gryllida/Черновик last symbol shows as separate outside of the URL? do you reproduce the bug?'))
.toBe('<a href=\"https://ru.wikipedia.org/wiki/Участница:Gryllida/Черновик\" target=\"_blank\">https://ru.wikipedia.org/wiki/Участница:Gryllida/Черновик</a> last symbol shows as separate outside of the URL? do you reproduce the bug?');
.toBe('<a href=\"https://ru.wikipedia.org/wiki/Участница:Gryllida/Черновик\" target=\"_blank\">ru.wikipedia.org/wiki/Участница:Gryllida/Черновик</a> last symbol shows as separate outside of the URL? do you reproduce the bug?');
expect(i18n.md('[mojo] marcusramberg opened pull request #1894: Minor tweaks to Growing guide. - https://git.io/JD9ph'))
.toBe('[mojo] marcusramberg opened pull request #1894: Minor tweaks to Growing guide. - <a href=\"https://git.io/JD9ph\" target=\"_blank\">https://git.io/JD9ph</a>');
});
.toBe('[mojo] marcusramberg opened pull request #1894: Minor tweaks to Growing guide. - <a href=\"https://git.io/JD9ph\" target=\"_blank\">git.io/JD9ph</a>');

test('md - code', () => {
expect(i18n.md('single `a` char'))
.toBe('single <code>a</code> char');
expect(i18n.md('is this \\`not code`, or..?'))
.toBe('is this `not code`, or..?');
expect(i18n.md('is this `not code`, or..?'))
.toBe('is this <code>not code</code>, or..?');
expect(i18n.md('not a `https://link.com`'))
.toBe('not a <code>https://link.com</code>');
expect(i18n.md('a regexp: `TShop\.Setup\(\s*([{](?>[^\\"{}]+|"(?>[^\\"]+|\\[\S\s])*"|\\[\S\s]|(?-1))*[}])`'))
.toBe('a regexp: <code>TShop\.Setup\(\s*([{](?&gt;[^\\"{}]+|"(?&gt;[^\\"]+|\\[\S\s])*"|\\[\S\s]|(?-1))*[}])</code>');
expect(i18n.md('kikuchi` changed nick to kikuchi```.'))
.toBe('kikuchi` changed nick to kikuchi```.');
// Protect against XSS
expect(i18n.md('https://x."//onfocus="alert(document.domain)"//autofocus="" b="'))
.toBe('<a href="https://x.&quot;//onfocus=&quot;alert(document.domain)&quot;//autofocus=&quot;" target="_blank">x.&quot;//onfocus=&quot;alert(document.domain)&quot;//autofocus=&quot;</a>&quot; b=&quot;');
});

test('md - nbsp', () => {
expect(i18n.md('')).toBe('&nbsp;');
expect(i18n.md(' ___ ___ _ ___ _____ ___')).toBe('&nbsp; &nbsp; ___ ___ &nbsp;_ &nbsp;___ &nbsp; _____ &nbsp;___');
test('md - markdown link', () => {
expect(i18n.md('some [cool chat](https://convos.chat)'))
.toBe('some <a href="https://convos.chat" target="_blank">cool chat</a>');
});

test('md - channel names', () => {
expect(i18n.md('want to join #foo-1.2 #foo-bar#not href="#anchor" #foo.bar'))
.toBe('want to join <a href=\"./%23foo-1.2\">#foo-1.2</a> <a href=\"./%23foo-bar\">#foo-bar</a>#not href="#anchor" <a href=\"./%23foo.bar\">#foo.bar</a>');
expect(i18n.md('want to join #foo-1.2 #foo-bar href="#anchor" #foo.bar'))
.toBe('want to join <a href="./%23foo-1.2">#foo-1.2</a> <a href="./%23foo-bar">#foo-bar</a> href=&quot;#anchor&quot; <a href="./%23foo.bar">#foo.bar</a>');
});

function countEmojis(str) {
Expand Down
198 changes: 134 additions & 64 deletions assets/store/I18N.js
@@ -1,13 +1,31 @@
import Emojis from '../js/Emojis';
import XRegExp from 'xregexp';
import Reactive from '../js/Reactive';
import {api} from '../js/Api';
import {derived} from 'svelte/store';
import {route} from '../store/Route';

const RE = {};
const STOP = ' ,.:;!"\'';
const XML_ESCAPE = {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&apos;', '"': '&quot;'};
const ESCAPE = {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&apos;', '"': '&quot;'};
const escape = (str, re = /[&<>'"]/g) => str.replace(re, (m) => ESCAPE[m]);
const nbsp = (str) => str.replace(/\s$/, '&nbsp;').replace(/^\s/, '&nbsp;').replace(/\s{2}/g, ' &nbsp;');
const tagPair = (tags) => [tags.map(n => `<${n}>`).join(''), tags.reverse().map(n => `</${n}>`).join('')];

const COLORS = {
'0': 'white',
'1': 'black',
'2': 'blue',
'3': 'green',
'4': 'red',
'5': 'brown',
'6': 'magenta',
'7': 'orange',
'8': 'yellow',
'9': 'lightgreen',
'10': 'cyan',
'11': 'lightcyan',
'12': 'lightblue',
'13': 'pink',
'14': 'grey',
'15': 'lightgrey',
};

export default class I18N extends Reactive {
constructor() {
Expand All @@ -21,6 +39,7 @@ export default class I18N extends Reactive {

this._languages = [];
this._languageOptions = [];
this._rules = this._makeRules();
}

/**
Expand Down Expand Up @@ -105,85 +124,136 @@ export default class I18N extends Reactive {
* @return {String} A string that might contain HTML tags.
*/
md(str, opt = {}) {
this._state = {};
str = this._xmlEscape(str);
str = this._nbsp(str);
if (!opt.raw) str = this._mdLink(str);
if (!opt.raw) str = this._plainUrlToLink(str);
if (!opt.raw) str = this._extendedFormatting(str);
if (!opt.raw) str = this._mdCode(str);
if (!opt.raw) str = this._mdEmStrong(str);
if (!opt.raw) str = this.emojis.markup(str);
if (!opt.raw) str = this._mdBlockQuote(str);
if (!opt.raw) str = this._mdChannelsAndNicks(str);
return str;
return !str.length ? '&nbsp;'
: opt.raw ? nbsp(escape(str))
: this.emojis.markup(nbsp(this._tagToHTML(this._makeTag(str))));
}

// https://modern.ircdocs.horse/formatting.html
_extendedFormatting(str) {
const zeroTo99 = '0[0-9]|[1-9][0-9]';
const colorRe = new RegExp('\x03(' + zeroTo99 + ')(?:,(' + zeroTo99 + '))?([^\x03]*)', 'g');
_makeRules() {
const rules = [];

return str.replace(colorRe, (all, fg, bg, text) => text).replace(/[\x02\x03\x1d\x1f\x1e\x11\x16\x0f]/g, '');
}
rules.push({tag: tagPair(['code']), re: /`(?=[^`\s])/, rules: [], handler: '_mdTag'});
rules.push({tag: tagPair(['em', 'strong']), re: /\*\*\*(?=\S)/, rules, handler: '_mdTag'});
rules.push({tag: tagPair(['strong']), re: /\*\*(?=\S)/, rules, handler: '_mdTag'});
rules.push({tag: tagPair(['em']), re: /\*(?=\S)/, rules, handler: '_mdTag'});
rules.push({tag: tagPair(['span']), re: /\x03\d{1,2}(?:,\d{1,2})?/, rules, handler: '_mdIrcColorFormatting'});
rules.push({tag: tagPair(['span']), re: /[\x02\x1d\x1e\x1f\x11]/, rules, handler: '_mdIrcTextFormatting'});
rules.push({tag: tagPair(['a']), re: /\[([a-zA-Z][^\]]+)\]\(([^)]+)\)/, rules: [], handler: '_mdLink'});
rules.push({tag: tagPair(['a']), re: /\b(https?|mailto):\S+/, rules: [], handler: '_mdURL'});
rules.push({tag: tagPair(['a']), re: /(?<=\s|^)#[a-zA-Z][\w.-]+(?=\W|$)/, rules: [], handler: '_mdChannelname'});

_mdBlockQuote(str) {
return str.replace(/^&gt;\s(.*)/, (all, quote) => '<blockquote>' + quote + '</blockquote>');
return rules;
}

_mdChannelsAndNicks(str) {
// TODO: Make nicks clickable
return str.replace(/(^|\s)(#[a-zA-Z][\w.-]+)/g, (all, pre, channel) => {
const suffix = channel.match(/\.$/) ? '.' : '';
if (suffix) channel = channel.replace(/\.$/, '');
return pre + '<a href="./' + route.urlFor(encodeURIComponent(channel)) + '">' + channel + '</a>' + suffix;
});
_makeTag(str, rules = this._rules, depth = 0) {
// blockquote
if (depth == 0 && str.indexOf('> ') == 0) {
return [tagPair(['blockquote']), {}, [this._makeTag(str.replace(/^>\s/, ''), rules, depth + 1)]];
}

const children = [];
for (const rule of rules) {
const match = str.match(rule.re);
if (!match) continue;

const tag = {
attrs: {},
after: str.substring(match.index + match[0].length),
before: str.substring(0, match.index),
captured: match[0],
index: match.index,
match,
tag: rule.tag,
};

this[rule.handler](tag);
if (typeof tag.content !== 'string') {
str = tag.before + tag.captured + tag.after;
continue;
}

if (tag.before.length) children.push(this._makeTag(tag.before, rules, depth + 1));
children.push([tag.tag, tag.attrs, [this._makeTag(tag.content, rule.rules, depth + 1)]]);
if (tag.after.length) children.push(this._makeTag(tag.after, rules, depth + 1));
break;
}

return [null, {}, children.length ? children : [escape(str)]];
}

_mdCode(str) {
return str.replace(/(\\?)`([^` ][^`]*)`/g, (all, esc, text) => {
return esc ? all.replace(/^\\/, '') : '<code>' + text + '</code>';
});
_mdChannelname(tag) {
tag.content = tag.captured;
tag.attrs.href = './' + encodeURIComponent(tag.captured);
}

_mdEmStrong(str) {
return str.replace(/(^|\s|")(\\?)(\*+)(\w[^<]*?)\3/g, (all, b, esc, md, text) => {
if (md.length == 1) return esc ? all.replace(/^\\/, '') : b + '<em>' + text + '</em>';
if (md.length == 2) return esc ? all.replace(/^\\/, '') : b + '<strong>' + text + '</strong>';
if (md.length == 3) return esc ? all.replace(/^\\/, '') : b + '<em><strong>' + text + '</strong></em>';
return all;
});
// https://modern.ircdocs.horse/formatting.html
_mdIrcColorFormatting(tag) {
const end = tag.after.indexOf('\x03');
if (end == -1) return;
tag.content = tag.after.substring(0, end);
tag.after = tag.after.substring(end + tag.captured.length);

const style = [];
const color = tag.captured.replace(/\x030?(\d{1,2}).*/, '$1');
if (COLORS[color]) style.push('color:' + COLORS[color]);
const background = tag.captured.replace(/.*,(\d{1,2}).*/, '$1');
if (COLORS[background]) style.push('background-color:' + COLORS[background]);
if (style.length) tag.attrs.style = style.join(';');
}

_mdLink(str) {
const re = RE.mdLink || (RE.mdLink = XRegExp('\\[ ([a-zA-Z][^\\]]+) \\] \\( ([^)]+) \\)', 'gx'));
return XRegExp.replace(str, re, (all, text, href) => {
const scheme = href.match(/^\s*(\w+):/) || ['', ''];
if (scheme[1] && ['http', 'https', 'mailto'].indexOf(scheme[1]) == -1) return all; // Avoid XSS links
this._state.md = true;
const first = href.substring(0, 1);
const target = ['/', '#'].indexOf(first) != -1 ? '' : ' target="_blank"';
return '<a href="' + route.urlFor(href) + '"' + target + '>' + text + '</a>';
});
// https://modern.ircdocs.horse/formatting.html
_mdIrcTextFormatting(tag) {
const end = tag.after.indexOf(tag.captured);
if (end == -1) return;

tag.content = tag.after.substring(0, end);
tag.after = tag.after.substring(end + tag.captured.length);
tag.tag = tag.captured == '\x02' ? tagPair(['strong'])
: tag.captured == '\x1d' ? tagPair(['em'])
: tag.captured == '\x1f' ? tagPair(['u'])
: tag.captured == '\x11' ? tagPair(['code'])
: tagPair(['span']);
}

_nbsp(str) {
return !str.length ? '&nbsp;' : str.replace(/^\s/, '&nbsp;').replace(/\s{2}/g, ' &nbsp;');
_mdLink(tag) {
tag.content = tag.match[1];
tag.attrs.href = escape(tag.match[2]);
if (tag.match[2].match(/^\w+:/)) tag.attrs.target = '_blank';
}

_plainUrlToLink(str) {
if (this._state.md) return str;
_mdTag(tag) {
// Check if the matched character was escaped
if (tag.before.match(/\\$/)) {
tag.before = tag.before.replace(/\\$/, '');
return;
}

const urlRe = RE.url || (RE.url = XRegExp(`(^|\\s) ( (?:http|https)://\\S+ | mailto:\\S+ )`, 'gx'));
const endRe = RE.urlEnd || (RE.urlEnd = XRegExp('^(.*)([' + STOP + '])$'));
return XRegExp.replace(str, urlRe, (all, b, url) => {
const parts = XRegExp.exec(url, endRe) || [all, url, ''];
return b + '<a href="' + parts[1] + '" target="_blank">' + parts[1].replace(/^mailto:/, '') + '</a>' + parts[2];
const end = tag.after.indexOf(tag.captured);
if (end == -1) return;
tag.content = tag.after.substring(0, end);
tag.after = tag.after.substring(end + tag.captured.length);
}

_mdURL(tag) {
tag.captured = tag.captured.replace(/[,.:;!"\']$/, (after) => {
tag.after = after[0] + tag.after;
return '';
});

tag.content = tag.captured.replace(/^(https|mailto):(\/\/)?/, '');
tag.attrs.href = escape(tag.captured);
tag.attrs.target = '_blank';
}

_xmlEscape(str) {
return str.replace(/[&<>']/g, (m) => XML_ESCAPE[m]);
_tagToHTML(tag) {
if (typeof tag === 'string') return tag;

const inner = typeof tag[2] === 'string' ? escape(tag[2]) : tag[2].map(n => this._tagToHTML(n)).join('');
if (!tag[0]) return inner;

const attrs = Object.keys(tag[1]).sort().map(k => `${k}="${tag[1][k]}"`).join(' ');
const startTag = !attrs ? tag[0][0] : tag[0][0].replace(/>/, ' ' + attrs + '>');
return startTag + inner + tag[0][1];
}
}

Expand Down

0 comments on commit 86b2193

Please sign in to comment.