Skip to content

Commit

Permalink
Merge pull request #527 from Crozzers/fix-base64-images
Browse files Browse the repository at this point in the history
Fix base64 images being corrupted in safe mode (issue #526)
  • Loading branch information
nicholasserra committed Sep 12, 2023
2 parents c94e417 + a2bab99 commit 2ef5653
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -3,6 +3,7 @@
## python-markdown2 2.4.11 (not yet released)

- [pull #524] Fix angles being escaped in style blocks (issue #523)
- [pull #527] Fix base64 images being corrupted in safe mode (issue #526)


## python-markdown2 2.4.10
Expand Down
32 changes: 28 additions & 4 deletions lib/markdown2.py
Expand Up @@ -1499,13 +1499,30 @@ def _extract_url_and_title(self, text, start):
url = self._strip_anglebrackets.sub(r'\1', url)
return url, title, end_idx

# https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
_data_url_re = re.compile(r'''
data:
# in format type/subtype;parameter=optional
(?P<mime>\w+/[\w+\.-]+(?:;\w+=[\w+\.-]+)?)?
# optional base64 token
(?P<token>;base64)?
,(?P<data>.*)
''', re.X)

def _protect_url(self, url):
'''
Function that passes a URL through `_html_escape_url` to remove any nasty characters,
and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
with it (eg: escaping "&" in URL parameters)
'''
url = _html_escape_url(url, safe_mode=self.safe_mode)
data_url = self._data_url_re.match(url)
charset = None
if data_url is not None:
mime = data_url.group('mime') or ''
if mime.startswith('image/') and data_url.group('token') == ';base64':
charset='base64'
url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset)
key = _hash_text(url)
self._escape_table[url] = key
return key
Expand Down Expand Up @@ -3045,14 +3062,21 @@ def _xml_encode_email_char_at_random(ch):
return '&#%s;' % ord(ch)


def _html_escape_url(attr, safe_mode=False):
"""Replace special characters that are potentially malicious in url string."""
def _html_escape_url(attr, safe_mode=False, charset=None):
"""
Replace special characters that are potentially malicious in url string.
Args:
charset: don't escape characters from this charset. Currently the only
exception is for '+' when charset=='base64'
"""
escaped = (attr
.replace('"', '&quot;')
.replace('<', '&lt;')
.replace('>', '&gt;'))
if safe_mode:
escaped = escaped.replace('+', ' ')
if charset != 'base64':
escaped = escaped.replace('+', ' ')
escaped = escaped.replace("'", "&#39;")
return escaped

Expand Down
3 changes: 3 additions & 0 deletions test/tm-cases/data_urls_in_safe_mode.html
@@ -0,0 +1,3 @@
<p><img src="" alt="smiley" /></p>

<p><a href="#">smiley</a></p>
1 change: 1 addition & 0 deletions test/tm-cases/data_urls_in_safe_mode.opts
@@ -0,0 +1 @@
{'safe_mode': True}
3 changes: 3 additions & 0 deletions test/tm-cases/data_urls_in_safe_mode.text
@@ -0,0 +1,3 @@
![smiley]()

[smiley]()

0 comments on commit 2ef5653

Please sign in to comment.