Merge pull request #514 from Crozzers/fix-markdown-in-html-in-list

Fix two list item related bugs
trentm · Jun 22, 2023 · 9f6b529 · 9f6b529
2 parents cef18e1 + da54c21
commit 9f6b529
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 20 deletions.
diff --git a/lib/markdown2.py b/lib/markdown2.py
@@ -363,6 +363,9 @@ def convert(self, text):
         # Turn block-level HTML blocks into hash entries
         text = self._hash_html_blocks(text, raw=True)
 
+        if 'markdown-in-html' in self.extras:
+            text = self._do_markdown_in_html(text)
+
         if "fenced-code-blocks" in self.extras and self.safe_mode:
             text = self._do_fenced_code_blocks(text)
 
@@ -878,27 +881,39 @@ def _hash_html_blocks(self, text, raw=False):
 
         return text
 
-    def _strict_tag_block_sub(self, text, html_tags_re, callback):
+    def _strict_tag_block_sub(self, text, html_tags_re, callback, allow_indent=False):
+        '''
+        Finds and substitutes HTML blocks within blocks of text
+
+        Args:
+            text: the text to search
+            html_tags_re: a regex pattern of HTML block tags to match against.
+                For example, `Markdown._block_tags_a`
+            callback: callback function that receives the found HTML text block
+            allow_indent: allow matching HTML blocks that are not completely outdented
+        '''
         tag_count = 0
         current_tag = html_tags_re
         block = ''
         result = ''
 
         for chunk in text.splitlines(True):
-            is_markup = re.match(r'^(?:</code>(?=</pre>))?(</?(%s)\b>?)' % current_tag, chunk)
+            is_markup = re.match(
+                r'^(\s{0,%s})(?:</code>(?=</pre>))?(</?(%s)\b>?)' % ('' if allow_indent else '0', current_tag), chunk
+            )
             block += chunk
 
             if is_markup:
-                if chunk.startswith('</'):
+                if chunk.startswith('%s</' % is_markup.group(1)):
                     tag_count -= 1
                 else:
                     # if close tag is in same line
-                    if self._tag_is_closed(is_markup.group(2), chunk):
+                    if self._tag_is_closed(is_markup.group(3), chunk):
                         # we must ignore these
                         is_markup = None
                     else:
                         tag_count += 1
-                        current_tag = is_markup.group(2)
+                        current_tag = is_markup.group(3)
 
             if tag_count == 0:
                 if is_markup:
@@ -915,6 +930,15 @@ def _tag_is_closed(self, tag_name, text):
         # super basic check if number of open tags == number of closing tags
         return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))
 
+    def _do_markdown_in_html(self, text):
+        def callback(block):
+            indent, block = self._uniform_outdent(block)
+            block = self._hash_html_block_sub(block)
+            block = self._uniform_indent(block, indent, include_empty_lines=True, indent_empty_lines=False)
+            return block
+
+        return self._strict_tag_block_sub(text, self._block_tags_a, callback, True)
+
     def _strip_link_definitions(self, text):
         # Strips link definitions from text, stores the URLs and titles in
         # hash references.
@@ -1893,7 +1917,8 @@ def _list_item_sub(self, match):
         item = match.group(4)
         leading_line = match.group(1)
         if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
-            item = self._run_block_gamut(self._outdent(item))
+            item = self._uniform_outdent(item, min_outdent=' ', max_outdent=self.tab)[1]
+            item = self._run_block_gamut(item)
         else:
             # Recursion for sub-lists:
             item = self._do_lists(self._uniform_outdent(item, min_outdent=' ')[1])
@@ -2201,7 +2226,7 @@ def _wavedrom_block_sub(self, match):
 
         return self._uniform_indent(
             '\n%s%s%s\n' % (open_tag, self._escape_table[waves], close_tag),
-            lead_indent, include_empty_lines=True
+            lead_indent, indent_empty_lines=True
         )
 
     def _do_wavedrom_blocks(self, text):
@@ -2612,13 +2637,16 @@ def _outdent(self, text):
         # Remove one level of line-leading tabs or spaces
         return self._outdent_re.sub('', text)
 
-    def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
-        # Removes the smallest common leading indentation from each (non empty)
-        # line of `text` and returns said indent along with the outdented text.
-        # The `min_outdent` kwarg makes sure the smallest common whitespace
-        # must be at least this size
-        # The `max_outdent` sets the maximum amount a line can be
-        # outdented by
+    @staticmethod
+    def _uniform_outdent(text, min_outdent=None, max_outdent=None):
+        '''
+        Removes the smallest common leading indentation from each (non empty)
+        line of `text` and returns said indent along with the outdented text.
+
+        Args:
+            min_outdent: make sure the smallest common whitespace is at least this size
+            max_outdent: the maximum amount a line can be outdented by
+        '''
 
         # find the leading whitespace for every line
         whitespace = [
@@ -2652,11 +2680,26 @@ def _uniform_outdent(self, text, min_outdent=None, max_outdent=None):
 
         return outdent, ''.join(outdented)
 
-    def _uniform_indent(self, text, indent, include_empty_lines=False):
-        return ''.join(
-            (indent + line if line.strip() or include_empty_lines else '')
-            for line in text.splitlines(True)
-        )
+    @staticmethod
+    def _uniform_indent(text, indent, include_empty_lines=False, indent_empty_lines=False):
+        '''
+        Uniformly indent a block of text by a fixed amount
+
+        Args:
+            text: the text to indent
+            indent: a string containing the indent to apply
+            include_empty_lines: don't remove whitespace only lines
+            indent_empty_lines: indent whitespace only lines with the rest of the text
+        '''
+        blocks = []
+        for line in text.splitlines(True):
+            if line.strip() or indent_empty_lines:
+                blocks.append(indent + line)
+            elif include_empty_lines:
+                blocks.append(line)
+            else:
+                blocks.append('')
+        return ''.join(blocks)
 
     @staticmethod
     def _match_overlaps_substr(text, match, substr):

diff --git a/test/tm-cases/markdown_in_html_in_lists.html b/test/tm-cases/markdown_in_html_in_lists.html
@@ -0,0 +1,37 @@
+<ul>
+<li><p>Item 1</p>
+
+<div>
+
+<h6>Block one</h6>
+
+<p>Some text</p>
+
+</div></li>
+<li><p>Item 2</p>
+
+<ul>
+<li><p>Item 3</p>
+
+<ul>
+<li><p>Item 4</p>
+
+<div>
+
+<h6>Block two</h6>
+
+<p>Some text</p>
+
+</div></li>
+</ul></li>
+<li><p>Item 5</p>
+
+<div>
+
+<h6>Block three</h6>
+
+<p>Some text</p>
+
+</div></li>
+</ul></li>
+</ul>
diff --git a/test/tm-cases/markdown_in_html_in_lists.opts b/test/tm-cases/markdown_in_html_in_lists.opts
@@ -0,0 +1 @@
+{"extras": ["markdown-in-html"]}
diff --git a/test/tm-cases/markdown_in_html_in_lists.text b/test/tm-cases/markdown_in_html_in_lists.text
@@ -0,0 +1,17 @@
+- Item 1
+  <div markdown="1">
+  ###### Block one
+  Some text
+  </div>
+- Item 2
+  - Item 3
+    - Item 4
+      <div markdown="1">
+      ###### Block two
+      Some text
+      </div>
+  - Item 5
+    <div markdown="1">
+    ###### Block three
+    Some text
+    </div>
diff --git a/test/tm-cases/nested_list.html b/test/tm-cases/nested_list.html
@@ -34,3 +34,18 @@
 </ul></li>
 <li>Item 3 - yes! just a single item</li>
 </ul>
+
+<p>Other more different nested list:</p>
+
+<ul>
+<li><p>Item 1
+With some space after</p></li>
+<li><p>Item 2</p>
+
+<ul>
+<li>Item 3
+<ul>
+<li>Item 4</li>
+</ul></li>
+</ul></li>
+</ul>
diff --git a/test/tm-cases/nested_list.text b/test/tm-cases/nested_list.text
@@ -20,4 +20,14 @@ Slightly more nested list:
     + What
     + The
     + Code
-* Item 3 - yes! just a single item
+* Item 3 - yes! just a single item
+
+
+Other more different nested list:
+
+- Item 1
+  With some space after
+
+- Item 2
+  - Item 3
+    - Item 4
diff --git a/test/tm-cases/seperated_list_items.html b/test/tm-cases/seperated_list_items.html
@@ -0,0 +1,12 @@
+<ul>
+<li><p>Item 1
+ABCDEF</p></li>
+<li><p>Item 2</p>
+
+<ul>
+<li>Item 3
+<ul>
+<li>Item 4</li>
+</ul></li>
+</ul></li>
+</ul>
diff --git a/test/tm-cases/seperated_list_items.text b/test/tm-cases/seperated_list_items.text
@@ -0,0 +1,6 @@
+- Item 1
+  ABCDEF
+
+- Item 2
+  - Item 3
+    - Item 4