From 22946b0729831f51f6e01a2f2ae34114eb087efa Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 17 Feb 2024 14:58:36 +0000 Subject: [PATCH 1/2] Process HTML comments as markdown in 'escape' safe mode --- lib/markdown2.py | 23 +++++++++++++++---- .../escape_html_comments_safe_mode.html | 3 +++ .../escape_html_comments_safe_mode.opts | 1 + .../escape_html_comments_safe_mode.text | 3 +++ 4 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 test/tm-cases/escape_html_comments_safe_mode.html create mode 100644 test/tm-cases/escape_html_comments_safe_mode.opts create mode 100644 test/tm-cases/escape_html_comments_safe_mode.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 3a1db281..3536a81f 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1264,15 +1264,30 @@ def _is_code_span(index, token): return re.match(r'md5-[A-Fa-f0-9]{32}', ''.join(peek_tokens)) + def _is_comment(token): + if self.safe_mode == 'replace': + # don't bother processing each section of comment in replace mode. Just do the whole thing + return + return re.match(r'()', token) + + def _hash(token): + key = _hash_text(token) + self.html_spans[key] = token + return key + tokens = [] split_tokens = self._sorta_html_tokenize_re.split(text) is_html_markup = False for index, token in enumerate(split_tokens): if is_html_markup and not _is_auto_link(token) and not _is_code_span(index, token): - sanitized = self._sanitize_html(token) - key = _hash_text(sanitized) - self.html_spans[key] = sanitized - tokens.append(key) + is_comment = _is_comment(token) + if is_comment: + tokens.append(_hash(self._sanitize_html(is_comment.group(1)))) + # sanitise but leave comment body intact for further markdown processing + tokens.append(self._sanitize_html(is_comment.group(2))) + tokens.append(_hash(self._sanitize_html(is_comment.group(3)))) + else: + tokens.append(_hash(self._sanitize_html(token))) else: tokens.append(self._encode_incomplete_tags(token)) is_html_markup = not is_html_markup diff --git a/test/tm-cases/escape_html_comments_safe_mode.html b/test/tm-cases/escape_html_comments_safe_mode.html new file mode 100644 index 00000000..9b2b2bc9 --- /dev/null +++ b/test/tm-cases/escape_html_comments_safe_mode.html @@ -0,0 +1,3 @@ +

foo <!-- bar

+ +

foo <!-- bar -->

diff --git a/test/tm-cases/escape_html_comments_safe_mode.opts b/test/tm-cases/escape_html_comments_safe_mode.opts new file mode 100644 index 00000000..f15d59b8 --- /dev/null +++ b/test/tm-cases/escape_html_comments_safe_mode.opts @@ -0,0 +1 @@ +{'safe_mode': 'escape'} diff --git a/test/tm-cases/escape_html_comments_safe_mode.text b/test/tm-cases/escape_html_comments_safe_mode.text new file mode 100644 index 00000000..c5f3eef2 --- /dev/null +++ b/test/tm-cases/escape_html_comments_safe_mode.text @@ -0,0 +1,3 @@ +*foo* From 2390b8733954da4b5519d0eb42dfb7a7f6464bbd Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 17 Feb 2024 15:04:15 +0000 Subject: [PATCH 2/2] Update changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 6430e856..b1731a43 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ - [pull #519] Add support for custom extras - [pull #519] Drop Python 3.5 support +- [pull #569] Process HTML comments as markdown in 'escape' safe mode ## python-markdown2 2.4.13