Skip to content

Commit

Permalink
expanded built-in markdown module with more standard features
Browse files Browse the repository at this point in the history
  • Loading branch information
mcfriend99 committed Jul 9, 2023
1 parent 8cc6912 commit 7bff6e0
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 6 deletions.
9 changes: 6 additions & 3 deletions libs/markdown/README.md
Expand Up @@ -2,11 +2,14 @@

This module is based on the [markdown-it](https://github.com/markdown-it/markdown-it) library by Vitaly Puzrin and Alex Kocharin.

It has been extended with support for the following by default:
It has been extended with support for the following by default in the standard (non-commonmark) mode:

- anchors
- emoji
- ins
- emojies
- insert
- subscripts
- superscripts
- mark


## Licenses
Expand Down
2 changes: 2 additions & 0 deletions libs/markdown/common/utils.b
Expand Up @@ -39,6 +39,8 @@ def is_valid_entity_code(c) {
var UNESCAPE_MD_RE = '\\\\([\\\\!"#$%&\'()*+,.\\/:;<=>?@[\\]^_`{|}~-])'
var ENTITY_RE = '&([a-z#][a-z0-9]{1,31});'
var UNESCAPE_ALL_RE = '/' + UNESCAPE_MD_RE + '|' + ENTITY_RE + '/si'
var UNESCAPE_RE = '/\\\\([ \\\\!"#$%&\'()*+,.\/:;<=>?@[\]^_`{|}~-])/'
var UNESCAPE_SPACE_RE = '/(^|[^\\\\])(\\\\\\\\)*\s/'

var DIGITAL_ENTITY_TEST_RE = '/^#((?:x[a-f0-9]{1,8}|[0-9]{1,8}))$/i'

Expand Down
5 changes: 4 additions & 1 deletion libs/markdown/inline/index.b
Expand Up @@ -13,7 +13,10 @@ import .linkify
import .newline
import .strikethrough
import .text
import .ins
import .insert
import .subscript
import .superscript
import .mark

# classes
import .inline_state
File renamed without changes.
127 changes: 127 additions & 0 deletions libs/markdown/inline/mark.b
@@ -0,0 +1,127 @@
# Insert each marker as a separate text token, and add it to delimiter list
#
def tokenize(state, silent) {
var i, scanned, token, len, ch,
start = state.pos,
marker = state.src[start]

if silent return false

if marker != '=' return false

scanned = state.scan_delims(state.pos, true)
len = scanned.length
ch = marker

if len < 2 return false

if len % 2 > 0 {
token = state.push('text', '', 0)
token.content = ch
len--
}

iter i = 0; i < len; i += 2 {
token = state.push('text', '', 0)
token.content = ch + ch

if !scanned.can_open and !scanned.can_close continue

state.delimiters.append({
marker: marker,
length: 0, # disable "rule of 3" length checks meant for emphasis
jump: i / 2, # 1 delimiter = 2 characters
token: state.tokens.length() - 1,
end: -1,
open: scanned.can_open,
close: scanned.can_close
})
}

state.pos += scanned.length

return true
}


# Walk through delimiter list and replace text tokens with tags
#
def _post_process(state, delimiters) {
var i, j,
start_delim,
end_delim,
token,
lone_markers = [],
max = delimiters.length()

iter i = 0; i < max; i++ {
start_delim = delimiters[i]

if start_delim.marker != '=' {
continue
}

if start_delim.end == -1 {
continue
}

end_delim = delimiters[start_delim.end]

token = state.tokens[start_delim.token]
token.type = 'mark_open'
token.tag = 'mark'
token.nesting = 1
token.markup = '=='
token.content = ''

token = state.tokens[end_delim.token]
token.type = 'mark_close'
token.tag = 'mark'
token.nesting = -1
token.markup = '=='
token.content = ''

if (state.tokens[end_delim.token - 1].type == 'text' and
state.tokens[end_delim.token - 1].content == '=') {

lone_markers.append(end_delim.token - 1)
}
}

# If a marker sequence has an odd number of characters, it's splitted
# like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
# start of the sequence.
#
# So, we have to move all those markers after subsequent s_close tags.
#
while lone_markers.length() > 0 {
i = lone_markers.pop()
j = i + 1

while j < state.tokens.length and state.tokens[j].type == 'mark_close' {
j++
}

j--

if i != j {
token = state.tokens[j]
state.tokens[j] = state.tokens[i]
state.tokens[i] = token
}
}
}

def post_process(state) {
var curr,
tokens_meta = state.tokens_meta,
max = (state.tokens_meta or []).length()

_post_process(state, state.delimiters)

iter curr = 0; curr < max; curr++ {
if tokens_meta[curr] and tokens_meta[curr].delimiters {
_post_process(state, tokens_meta[curr].delimiters)
}
}
}
57 changes: 57 additions & 0 deletions libs/markdown/inline/subscript.b
@@ -0,0 +1,57 @@
import ..common.utils { UNESCAPE_RE, UNESCAPE_SPACE_RE }


def subscript(state, silent) {
var found,
content,
token,
max = state.pos_max,
start = state.pos

if state.src[start] != '~' return false
if silent return false # don't run any pairs in validation mode
if start + 2 >= max return false

state.pos = start + 1

while state.pos < max {
if state.src[state.pos] == '~' {
found = true
break
}

state.md.inline.skip_token(state)
}

if !found or start + 1 == state.pos {
state.pos = start
return false
}

content = state.src[start + 1, state.pos]

# don't allow unescaped spaces/newlines inside
if content.match(UNESCAPE_SPACE_RE) {
state.pos = start
return false
}

# found!
state.pos_max = state.pos
state.pos = start + 1

# Earlier we checked !silent, but this implementation does not need it
token = state.push('sub_open', 'sub', 1)
token.markup = '~'

token = state.push('text', '', 0)
token.content = content.replace(UNESCAPE_RE, '$1')

token = state.push('sub_close', 'sub', -1)
token.markup = '~'

state.pos = state.pos_max + 1
state.pos_max = max
return true
}

56 changes: 56 additions & 0 deletions libs/markdown/inline/superscript.b
@@ -0,0 +1,56 @@
import ..common.utils { UNESCAPE_RE, UNESCAPE_SPACE_RE }

def superscript(state, silent) {
var found,
content,
token,
max = state.pos_max,
start = state.pos

if state.src[start] != '^' return false
if silent return false # don't run any pairs in validation mode
if start + 2 >= max return false

state.pos = start + 1

while state.pos < max {
if state.src[state.pos] == '^' {
found = true
break
}

state.md.inline.skip_token(state)
}

if !found or start + 1 == state.pos {
state.pos = start
return false
}

content = state.src[start + 1, state.pos]

# don't allow unescaped spaces/newlines inside
if content.match(UNESCAPE_SPACE_RE) {
state.pos = start
return false
}

# found!
state.pos_max = state.pos
state.pos = start + 1

# Earlier we checked !silent, but this implementation does not need it
token = state.push('sup_open', 'sup', 1)
token.markup = '^'

token = state.push('text', '', 0)
token.content = content.replace(UNESCAPE_RE, '$1')

token = state.push('sup_close', 'sup', -1)
token.markup = '^'

state.pos = state.pos_max + 1
state.pos_max = max
return true
}

8 changes: 6 additions & 2 deletions libs/markdown/parser_inline.b
Expand Up @@ -10,8 +10,11 @@ var _rules = [
[ 'escape', inline.escape ],
[ 'backticks', inline.backticks ],
[ 'strikethrough', inline.strikethrough.tokenize ],
[ 'ins', inline.ins.tokenize ],
[ 'insert', inline.insert.tokenize ],
[ 'mark', inline.mark.tokenize ],
[ 'emphasis', inline.emphasis.tokenize ],
[ 'subscript', inline.subscript ],
[ 'superscript', inline.superscript ],
[ 'link', inline.link ],
[ 'image', inline.image ],
[ 'autolink', inline.autolink ],
Expand All @@ -26,7 +29,8 @@ var _rules = [
var _rules2 = [
[ 'balance_pairs', inline.balance_pairs ],
[ 'strikethrough', inline.strikethrough.post_process ],
[ 'ins', inline.ins.post_process ],
[ 'insert', inline.insert.post_process ],
[ 'mark', inline.mark.post_process ],
[ 'emphasis', inline.emphasis.post_process ],
# rules for pairs separate '**' into its own text tokens, which may be left unused,
# rule below merges unused segments back with the rest of the text
Expand Down

0 comments on commit 7bff6e0

Please sign in to comment.