Skip to content

Commit

Permalink
Do not parse Markdown/HTML in name or url fields
Browse files Browse the repository at this point in the history
Closes #151.
  • Loading branch information
ctrueden committed May 8, 2024
1 parent 1464efa commit 226e102
Showing 1 changed file with 11 additions and 8 deletions.
19 changes: 11 additions & 8 deletions generate-legacy-pages.py
Expand Up @@ -5,13 +5,14 @@

# NB: Avoid annoying BeautifulSoup warnings of the following kind:
#
# UserWarning: "https://.../" looks like a URL. Beautiful Soup is not an
# HTTP client. You should probably use an HTTP client like requests to get
# the document behind the URL, and feed that document to Beautiful Soup.
# MarkupResemblesLocatorWarning: The input looks more like a URL than markup.
# You may want to use an HTTP client like requests to get the document behind
# the URL, and feed that document to Beautiful Soup.
#
# See: https://stackoverflow.com/a/41496131/1207769
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')
from bs4 import MarkupResemblesLocatorWarning
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)

def html(markdown_string):
s = markdown2.markdown(markdown_string).rstrip()
Expand Down Expand Up @@ -39,8 +40,9 @@ def plain(html_string):

# Render sites.xml from the sites data structure.
xml_data = xml_template.render(sites=[{
'name': escape(plain(html(site['name']))),
'url': escape(plain(html(site['url']))),
# NB: No Markdown or HTML allowed in name or url!
'name': site['name'],
'url': site['url'],
'description': escape(plain(html(site['description']))),
'maintainer': escape(', '.join([plain(html(m)) for m in site['maintainers']]))
} for site in sites['sites']], date=date, time=time)
Expand All @@ -65,8 +67,9 @@ def plain(html_string):

# Render sites.html from the sites data structure.
result = html_template.render(sites=[{
'name': html(site['name']),
'url': html(site['url']),
# NB: No Markdown or HTML allowed in name or url!
'name': site['name'],
'url': site['url'],
'description': html(site['description']),
'maintainer': ', '.join([html(m) for m in site['maintainers']])
} for site in sites['sites']], date=date, time=time)
Expand Down

0 comments on commit 226e102

Please sign in to comment.