diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index c16627629..9fae9fb12 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node): while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: c_ns = c_node.nsDef while c_ns is not NULL: - prefix = funicodeOrNone(c_ns.prefix) - if prefix not in nsmap: - nsmap[prefix] = funicodeOrNone(c_ns.href) + if c_ns.prefix or c_ns.href: + prefix = funicodeOrNone(c_ns.prefix) + if prefix not in nsmap: + nsmap[prefix] = funicodeOrNone(c_ns.href) c_ns = c_ns.next c_node = c_node.parent return nsmap diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 138c23a6a..a7299da6d 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node): count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - count += 1 + count += (c_ns.href is not NULL) c_ns = c_ns.next return count @@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '', - funicode(c_ns.href)) - event_list.append( (u"start-ns", ns_tuple) ) - count += 1 + if c_ns.href: + ns_tuple = (funicodeOrEmpty(c_ns.prefix), + funicode(c_ns.href)) + event_list.append( (u"start-ns", ns_tuple) ) + count += 1 c_ns = c_ns.next return count diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index e5f084692..285313f6e 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1460,6 +1460,26 @@ def test_iterwalk_getiterator(self): [1,2,1,4], counts) + def test_walk_after_parse_failure(self): + # This used to be an issue because libxml2 can leak empty namespaces + # between failed parser runs. iterwalk() failed to handle such a tree. + try: + etree.XML('''''') + except etree.XMLSyntaxError: + pass + else: + assert False, "invalid input did not fail to parse" + + et = etree.XML(''' ''') + try: + ns = next(etree.iterwalk(et, events=('start-ns',))) + except StopIteration: + # This would be the expected result, because there was no namespace + pass + else: + # This is a bug in libxml2 + assert not ns, repr(ns) + def test_itertext_comment_pi(self): # https://bugs.launchpad.net/lxml/+bug/1844674 XML = self.etree.XML