diff --git a/libs/markdown/README.md b/libs/markdown/README.md index 35aa7dc1..56108c52 100644 --- a/libs/markdown/README.md +++ b/libs/markdown/README.md @@ -2,11 +2,14 @@ This module is based on the [markdown-it](https://github.com/markdown-it/markdown-it) library by Vitaly Puzrin and Alex Kocharin. -It has been extended with support for the following by default: +It has been extended with support for the following by default in the standard (non-commonmark) mode: - anchors -- emoji -- ins +- emojies +- insert +- subscripts +- superscripts +- mark ## Licenses diff --git a/libs/markdown/common/utils.b b/libs/markdown/common/utils.b index b11a91a9..221c5080 100644 --- a/libs/markdown/common/utils.b +++ b/libs/markdown/common/utils.b @@ -39,6 +39,8 @@ def is_valid_entity_code(c) { var UNESCAPE_MD_RE = '\\\\([\\\\!"#$%&\'()*+,.\\/:;<=>?@[\\]^_`{|}~-])' var ENTITY_RE = '&([a-z#][a-z0-9]{1,31});' var UNESCAPE_ALL_RE = '/' + UNESCAPE_MD_RE + '|' + ENTITY_RE + '/si' +var UNESCAPE_RE = '/\\\\([ \\\\!"#$%&\'()*+,.\/:;<=>?@[\]^_`{|}~-])/' +var UNESCAPE_SPACE_RE = '/(^|[^\\\\])(\\\\\\\\)*\s/' var DIGITAL_ENTITY_TEST_RE = '/^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$/i' diff --git a/libs/markdown/inline/index.b b/libs/markdown/inline/index.b index 06cac304..3ff28f37 100644 --- a/libs/markdown/inline/index.b +++ b/libs/markdown/inline/index.b @@ -13,7 +13,10 @@ import .linkify import .newline import .strikethrough import .text -import .ins +import .insert +import .subscript +import .superscript +import .mark # classes import .inline_state diff --git a/libs/markdown/inline/ins.b b/libs/markdown/inline/insert.b similarity index 100% rename from libs/markdown/inline/ins.b rename to libs/markdown/inline/insert.b diff --git a/libs/markdown/inline/mark.b b/libs/markdown/inline/mark.b new file mode 100644 index 00000000..65b8ee2c --- /dev/null +++ b/libs/markdown/inline/mark.b @@ -0,0 +1,127 @@ +# Insert each marker as a separate text token, and add it to delimiter list +# +def tokenize(state, silent) { + var i, scanned, token, len, ch, + start = state.pos, + marker = state.src[start] + + if silent return false + + if marker != '=' return false + + scanned = state.scan_delims(state.pos, true) + len = scanned.length + ch = marker + + if len < 2 return false + + if len % 2 > 0 { + token = state.push('text', '', 0) + token.content = ch + len-- + } + + iter i = 0; i < len; i += 2 { + token = state.push('text', '', 0) + token.content = ch + ch + + if !scanned.can_open and !scanned.can_close continue + + state.delimiters.append({ + marker: marker, + length: 0, # disable "rule of 3" length checks meant for emphasis + jump: i / 2, # 1 delimiter = 2 characters + token: state.tokens.length() - 1, + end: -1, + open: scanned.can_open, + close: scanned.can_close + }) + } + + state.pos += scanned.length + + return true +} + + +# Walk through delimiter list and replace text tokens with tags +# +def _post_process(state, delimiters) { + var i, j, + start_delim, + end_delim, + token, + lone_markers = [], + max = delimiters.length() + + iter i = 0; i < max; i++ { + start_delim = delimiters[i] + + if start_delim.marker != '=' { + continue + } + + if start_delim.end == -1 { + continue + } + + end_delim = delimiters[start_delim.end] + + token = state.tokens[start_delim.token] + token.type = 'mark_open' + token.tag = 'mark' + token.nesting = 1 + token.markup = '==' + token.content = '' + + token = state.tokens[end_delim.token] + token.type = 'mark_close' + token.tag = 'mark' + token.nesting = -1 + token.markup = '==' + token.content = '' + + if (state.tokens[end_delim.token - 1].type == 'text' and + state.tokens[end_delim.token - 1].content == '=') { + + lone_markers.append(end_delim.token - 1) + } + } + + # If a marker sequence has an odd number of characters, it's splitted + # like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the + # start of the sequence. + # + # So, we have to move all those markers after subsequent s_close tags. + # + while lone_markers.length() > 0 { + i = lone_markers.pop() + j = i + 1 + + while j < state.tokens.length and state.tokens[j].type == 'mark_close' { + j++ + } + + j-- + + if i != j { + token = state.tokens[j] + state.tokens[j] = state.tokens[i] + state.tokens[i] = token + } + } +} + +def post_process(state) { + var curr, + tokens_meta = state.tokens_meta, + max = (state.tokens_meta or []).length() + + _post_process(state, state.delimiters) + + iter curr = 0; curr < max; curr++ { + if tokens_meta[curr] and tokens_meta[curr].delimiters { + _post_process(state, tokens_meta[curr].delimiters) + } + } +} \ No newline at end of file diff --git a/libs/markdown/inline/subscript.b b/libs/markdown/inline/subscript.b new file mode 100644 index 00000000..8a8866a0 --- /dev/null +++ b/libs/markdown/inline/subscript.b @@ -0,0 +1,57 @@ +import ..common.utils { UNESCAPE_RE, UNESCAPE_SPACE_RE } + + +def subscript(state, silent) { + var found, + content, + token, + max = state.pos_max, + start = state.pos + + if state.src[start] != '~' return false + if silent return false # don't run any pairs in validation mode + if start + 2 >= max return false + + state.pos = start + 1 + + while state.pos < max { + if state.src[state.pos] == '~' { + found = true + break + } + + state.md.inline.skip_token(state) + } + + if !found or start + 1 == state.pos { + state.pos = start + return false + } + + content = state.src[start + 1, state.pos] + + # don't allow unescaped spaces/newlines inside + if content.match(UNESCAPE_SPACE_RE) { + state.pos = start + return false + } + + # found! + state.pos_max = state.pos + state.pos = start + 1 + + # Earlier we checked !silent, but this implementation does not need it + token = state.push('sub_open', 'sub', 1) + token.markup = '~' + + token = state.push('text', '', 0) + token.content = content.replace(UNESCAPE_RE, '$1') + + token = state.push('sub_close', 'sub', -1) + token.markup = '~' + + state.pos = state.pos_max + 1 + state.pos_max = max + return true +} + diff --git a/libs/markdown/inline/superscript.b b/libs/markdown/inline/superscript.b new file mode 100644 index 00000000..4939c12e --- /dev/null +++ b/libs/markdown/inline/superscript.b @@ -0,0 +1,56 @@ +import ..common.utils { UNESCAPE_RE, UNESCAPE_SPACE_RE } + +def superscript(state, silent) { + var found, + content, + token, + max = state.pos_max, + start = state.pos + + if state.src[start] != '^' return false + if silent return false # don't run any pairs in validation mode + if start + 2 >= max return false + + state.pos = start + 1 + + while state.pos < max { + if state.src[state.pos] == '^' { + found = true + break + } + + state.md.inline.skip_token(state) + } + + if !found or start + 1 == state.pos { + state.pos = start + return false + } + + content = state.src[start + 1, state.pos] + + # don't allow unescaped spaces/newlines inside + if content.match(UNESCAPE_SPACE_RE) { + state.pos = start + return false + } + + # found! + state.pos_max = state.pos + state.pos = start + 1 + + # Earlier we checked !silent, but this implementation does not need it + token = state.push('sup_open', 'sup', 1) + token.markup = '^' + + token = state.push('text', '', 0) + token.content = content.replace(UNESCAPE_RE, '$1') + + token = state.push('sup_close', 'sup', -1) + token.markup = '^' + + state.pos = state.pos_max + 1 + state.pos_max = max + return true +} + diff --git a/libs/markdown/parser_inline.b b/libs/markdown/parser_inline.b index dfc2b6e0..46b1d55e 100644 --- a/libs/markdown/parser_inline.b +++ b/libs/markdown/parser_inline.b @@ -10,8 +10,11 @@ var _rules = [ [ 'escape', inline.escape ], [ 'backticks', inline.backticks ], [ 'strikethrough', inline.strikethrough.tokenize ], - [ 'ins', inline.ins.tokenize ], + [ 'insert', inline.insert.tokenize ], + [ 'mark', inline.mark.tokenize ], [ 'emphasis', inline.emphasis.tokenize ], + [ 'subscript', inline.subscript ], + [ 'superscript', inline.superscript ], [ 'link', inline.link ], [ 'image', inline.image ], [ 'autolink', inline.autolink ], @@ -26,7 +29,8 @@ var _rules = [ var _rules2 = [ [ 'balance_pairs', inline.balance_pairs ], [ 'strikethrough', inline.strikethrough.post_process ], - [ 'ins', inline.ins.post_process ], + [ 'insert', inline.insert.post_process ], + [ 'mark', inline.mark.post_process ], [ 'emphasis', inline.emphasis.post_process ], # rules for pairs separate '**' into its own text tokens, which may be left unused, # rule below merges unused segments back with the rest of the text