From e2734be0a527528a1450748c7a06cd973a9634b3 Mon Sep 17 00:00:00 2001 From: Sangyub Lee Date: Sun, 2 Apr 2023 08:12:12 +0900 Subject: [PATCH] Fix XXE vulnerability for mathml parser --- .github/workflows/runtests.yml | 2 +- sympy/utilities/mathml/__init__.py | 31 +++++++++++++++++++------- sympy/utilities/tests/sensitive.txt | 1 + sympy/utilities/tests/test_mathml.py | 33 ++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 sympy/utilities/tests/sensitive.txt create mode 100644 sympy/utilities/tests/test_mathml.py diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index 6b8b62b9fe9e..77b10d245a0c 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -199,7 +199,7 @@ jobs: # dependencies to install in all Python versions: - run: pip install mpmath numpy numexpr matplotlib ipython cython scipy \ - aesara wurlitzer autowrap pytest \ + aesara wurlitzer autowrap lxml pytest \ 'antlr4-python3-runtime==4.11.*' # Not available in pypy or cpython 3.11 (yet). diff --git a/sympy/utilities/mathml/__init__.py b/sympy/utilities/mathml/__init__.py index 83a73fff96ae..9af4a8c23cd5 100644 --- a/sympy/utilities/mathml/__init__.py +++ b/sympy/utilities/mathml/__init__.py @@ -20,10 +20,19 @@ def add_mathml_headers(s): @doctest_depends_on(modules=('lxml',)) def apply_xsl(mml, xsl): - """Apply a xsl to a MathML string - @param mml: a string with MathML code - @param xsl: a string representing a path to a xsl (xml stylesheet) - file. This file name is relative to the PYTHONPATH + """Apply a xsl to a MathML string. + + Parameters + ========== + + mml + A string with MathML code. + xsl + A string representing a path to a xsl (xml stylesheet) file. + This file name is relative to the PYTHONPATH. + + Examples + ======== >>> from sympy.utilities.mathml import apply_xsl >>> xsl = 'mathml/data/simple_mmlctop.xsl' @@ -31,12 +40,15 @@ def apply_xsl(mml, xsl): >>> res = apply_xsl(mml,xsl) >>> ''.join(res.splitlines()) ' a + b' - """ from lxml import etree - s = etree.XML(get_resource(xsl).read()) - transform = etree.XSLT(s) - doc = etree.XML(mml) + + parser = etree.XMLParser(resolve_entities=False) + ac = etree.XSLTAccessControl.DENY_ALL + + s = etree.XML(get_resource(xsl).read(), parser=parser) + transform = etree.XSLT(s, access_control=ac) + doc = etree.XML(mml, parser=parser) result = transform(doc) s = str(result) return s @@ -48,6 +60,9 @@ def c2p(mml, simple=False): in one document in MathML presentation, more suitable for printing, and more widely accepted + Examples + ======== + >>> from sympy.utilities.mathml import c2p >>> mml = ' 2 ' >>> c2p(mml,simple=True) != c2p(mml,simple=False) diff --git a/sympy/utilities/tests/sensitive.txt b/sympy/utilities/tests/sensitive.txt new file mode 100644 index 000000000000..7c2317a55325 --- /dev/null +++ b/sympy/utilities/tests/sensitive.txt @@ -0,0 +1 @@ +USERNAME, PASSWORD diff --git a/sympy/utilities/tests/test_mathml.py b/sympy/utilities/tests/test_mathml.py new file mode 100644 index 000000000000..b5b803f40282 --- /dev/null +++ b/sympy/utilities/tests/test_mathml.py @@ -0,0 +1,33 @@ +import os +from textwrap import dedent +from sympy.external import import_module +from sympy.testing.pytest import skip +from sympy.utilities.mathml import apply_xsl + + + +lxml = import_module('lxml') + +path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'sensitive.txt')) + + +def test_xxe(): + assert os.path.isfile(path) + if not lxml: + skip("lxml not installed.") + + mml = dedent( + rf""" + + ]> + + John + &ent; + + """ + ) + xsl = 'mathml/data/simple_mmlctop.xsl' + + res = apply_xsl(mml, xsl) + assert res == \ + '\n\nJohn\n\n\n'