Skip to content

Commit

Permalink
Fix a crash when incorrect parser input occurs together with usages o…
Browse files Browse the repository at this point in the history
…f iterwalk() on trees generated by the same parser.
  • Loading branch information
scoder committed Jul 1, 2022
1 parent 50c2764 commit 86368e9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 8 deletions.
7 changes: 4 additions & 3 deletions src/lxml/apihelpers.pxi
Expand Up @@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
c_ns = c_node.nsDef
while c_ns is not NULL:
prefix = funicodeOrNone(c_ns.prefix)
if prefix not in nsmap:
nsmap[prefix] = funicodeOrNone(c_ns.href)
if c_ns.prefix or c_ns.href:
prefix = funicodeOrNone(c_ns.prefix)
if prefix not in nsmap:
nsmap[prefix] = funicodeOrNone(c_ns.href)
c_ns = c_ns.next
c_node = c_node.parent
return nsmap
Expand Down
11 changes: 6 additions & 5 deletions src/lxml/iterparse.pxi
Expand Up @@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
count += 1
count += (c_ns.href is not NULL)
c_ns = c_ns.next
return count

Expand All @@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
count = 0
c_ns = c_node.nsDef
while c_ns is not NULL:
ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
funicode(c_ns.href))
event_list.append( (u"start-ns", ns_tuple) )
count += 1
if c_ns.href:
ns_tuple = (funicodeOrEmpty(c_ns.prefix),
funicode(c_ns.href))
event_list.append( (u"start-ns", ns_tuple) )
count += 1
c_ns = c_ns.next
return count
20 changes: 20 additions & 0 deletions src/lxml/tests/test_etree.py
Expand Up @@ -1460,6 +1460,26 @@ def test_iterwalk_getiterator(self):
[1,2,1,4],
counts)

def test_walk_after_parse_failure(self):
# This used to be an issue because libxml2 can leak empty namespaces
# between failed parser runs. iterwalk() failed to handle such a tree.
try:
etree.XML('''<anot xmlns="1">''')
except etree.XMLSyntaxError:
pass
else:
assert False, "invalid input did not fail to parse"

et = etree.XML('''<root> </root>''')
try:
ns = next(etree.iterwalk(et, events=('start-ns',)))
except StopIteration:
# This would be the expected result, because there was no namespace
pass
else:
# This is a bug in libxml2
assert not ns, repr(ns)

def test_itertext_comment_pi(self):
# https://bugs.launchpad.net/lxml/+bug/1844674
XML = self.etree.XML
Expand Down

2 comments on commit 86368e9

@zhuofeng6
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would this cause a breakdown?

@scoder
Copy link
Member Author

@scoder scoder commented on 86368e9 Aug 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.