Skip to content

Commit

Permalink
Merge pull request #514 from Crozzers/fix-markdown-in-html-in-list
Browse files Browse the repository at this point in the history
Fix two list item related bugs
  • Loading branch information
nicholasserra committed Jun 22, 2023
2 parents cef18e1 + da54c21 commit 9f6b529
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 20 deletions.
81 changes: 62 additions & 19 deletions lib/markdown2.py
Expand Up @@ -363,6 +363,9 @@ def convert(self, text):
# Turn block-level HTML blocks into hash entries
text = self._hash_html_blocks(text, raw=True)

if 'markdown-in-html' in self.extras:
text = self._do_markdown_in_html(text)

if "fenced-code-blocks" in self.extras and self.safe_mode:
text = self._do_fenced_code_blocks(text)

Expand Down Expand Up @@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False):

return text

def _strict_tag_block_sub(self, text, html_tags_re, callback):
def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False):
'''
Finds and substitutes HTML blocks within blocks of text
Args:
text: the text to search
html_tags_re: a regex pattern of HTML block tags to match against.
For example, `Markdown._block_tags_a`
callback: callback function that receives the found HTML text block
allow_indent: allow matching HTML blocks that are not completely outdented
'''
tag_count = 0
current_tag = html_tags_re
block = ''
result = ''

for chunk in text.splitlines(True):
is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
is_markup = re.match(
r'^(\s{0,%s})(?:</code>(?=</pre>))?(</?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk
)
block += chunk

if is_markup:
if chunk.startswith('</'):
if chunk.startswith('%s</' % is_markup.group(1)):
tag_count -= 1
else:
# if close tag is in same line
if self._tag_is_closed(is_markup.group(2), chunk):
if self._tag_is_closed(is_markup.group(3), chunk):
# we must ignore these
is_markup = None
else:
tag_count += 1
current_tag = is_markup.group(2)
current_tag = is_markup.group(3)

if tag_count == 0:
if is_markup:
Expand All @@ -915,6 +930,15 @@ def _tag_is_closed(self, tag_name, text):
# super basic check if number of open tags == number of closing tags
return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))

def _do_markdown_in_html(self, text):
def callback(block):
indent, block = self._uniform_outdent(block)
block = self._hash_html_block_sub(block)
block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
return block

return self._strict_tag_block_sub(text, self._block_tags_a, callback, True)

def _strip_link_definitions(self, text):
# Strips link definitions from text, stores the URLs and titles in
# hash references.
Expand Down Expand Up @@ -1893,7 +1917,8 @@ def _list_item_sub(self, match):
item = match.group(4)
leading_line = match.group(1)
if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
item = self._run_block_gamut(self._outdent(item))
item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1]
item = self._run_block_gamut(item)
else:
# Recursion for sub-lists:
item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1])
Expand Down Expand Up @@ -2201,7 +2226,7 @@ def _wavedrom_block_sub(self, match):

return self._uniform_indent(
'\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
lead_indent, include_empty_lines=True
lead_indent, indent_empty_lines=True
)

def _do_wavedrom_blocks(self, text):
Expand Down Expand Up @@ -2612,13 +2637,16 @@ def _outdent(self, text):
# Remove one level of line-leading tabs or spaces
return self._outdent_re.sub('', text)

def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
# Removes the smallest common leading indentation from each (non empty)
# line of `text` and returns said indent along with the outdented text.
# The `min_outdent` kwarg makes sure the smallest common whitespace
# must be at least this size
# The `max_outdent` sets the maximum amount a line can be
# outdented by
@staticmethod
def _uniform_outdent(text, min_outdent=None, max_outdent=None):
'''
Removes the smallest common leading indentation from each (non empty)
line of `text` and returns said indent along with the outdented text.
Args:
min_outdent: make sure the smallest common whitespace is at least this size
max_outdent: the maximum amount a line can be outdented by
'''

# find the leading whitespace for every line
whitespace = [
Expand Down Expand Up @@ -2652,11 +2680,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):

return outdent, ''.join(outdented)

def _uniform_indent(self, text, indent, include_empty_lines=False):
return ''.join(
(indent + line if line.strip() or include_empty_lines else '')
for line in text.splitlines(True)
)
@staticmethod
def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False):
'''
Uniformly indent a block of text by a fixed amount
Args:
text: the text to indent
indent: a string containing the indent to apply
include_empty_lines: don't remove whitespace only lines
indent_empty_lines: indent whitespace only lines with the rest of the text
'''
blocks = []
for line in text.splitlines(True):
if line.strip() or indent_empty_lines:
blocks.append(indent + line)
elif include_empty_lines:
blocks.append(line)
else:
blocks.append('')
return ''.join(blocks)

@staticmethod
def _match_overlaps_substr(text, match, substr):
Expand Down
37 changes: 37 additions & 0 deletions test/tm-cases/markdown_in_html_in_lists.html
@@ -0,0 +1,37 @@
<ul>
<li><p>Item 1</p>

<div>

<h6>Block one</h6>

<p>Some text</p>

</div></li>
<li><p>Item 2</p>

<ul>
<li><p>Item 3</p>

<ul>
<li><p>Item 4</p>

<div>

<h6>Block two</h6>

<p>Some text</p>

</div></li>
</ul></li>
<li><p>Item 5</p>

<div>

<h6>Block three</h6>

<p>Some text</p>

</div></li>
</ul></li>
</ul>
1 change: 1 addition & 0 deletions test/tm-cases/markdown_in_html_in_lists.opts
@@ -0,0 +1 @@
{"extras": ["markdown-in-html"]}
17 changes: 17 additions & 0 deletions test/tm-cases/markdown_in_html_in_lists.text
@@ -0,0 +1,17 @@
- Item 1
<div markdown="1">
###### Block one
Some text
</div>
- Item 2
- Item 3
- Item 4
<div markdown="1">
###### Block two
Some text
</div>
- Item 5
<div markdown="1">
###### Block three
Some text
</div>
15 changes: 15 additions & 0 deletions test/tm-cases/nested_list.html
Expand Up @@ -34,3 +34,18 @@
</ul></li>
<li>Item 3 - yes! just a single item</li>
</ul>

<p>Other more different nested list:</p>

<ul>
<li><p>Item 1
With some space after</p></li>
<li><p>Item 2</p>

<ul>
<li>Item 3
<ul>
<li>Item 4</li>
</ul></li>
</ul></li>
</ul>
12 changes: 11 additions & 1 deletion test/tm-cases/nested_list.text
Expand Up @@ -20,4 +20,14 @@ Slightly more nested list:
+ What
+ The
+ Code
* Item 3 - yes! just a single item
* Item 3 - yes! just a single item


Other more different nested list:

- Item 1
With some space after

- Item 2
- Item 3
- Item 4
12 changes: 12 additions & 0 deletions test/tm-cases/seperated_list_items.html
@@ -0,0 +1,12 @@
<ul>
<li><p>Item 1
ABCDEF</p></li>
<li><p>Item 2</p>

<ul>
<li>Item 3
<ul>
<li>Item 4</li>
</ul></li>
</ul></li>
</ul>
6 changes: 6 additions & 0 deletions test/tm-cases/seperated_list_items.text
@@ -0,0 +1,6 @@
- Item 1
ABCDEF

- Item 2
- Item 3
- Item 4

0 comments on commit 9f6b529

Please sign in to comment.