From 5d83f1e8482ca304db8be726cad89554c88f136a Mon Sep 17 00:00:00 2001 From: John Bauer Date: Thu, 7 Oct 2021 12:38:42 -0700 Subject: [PATCH] Attempt to prevent external document attacks by wrapping DocumentBuilderFactory with a bunch of attribute changes --- src/edu/stanford/nlp/time/XMLUtils.java | 5 ++-- src/edu/stanford/nlp/util/XMLUtils.java | 31 +++++++++++++++++++------ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/edu/stanford/nlp/time/XMLUtils.java b/src/edu/stanford/nlp/time/XMLUtils.java index 2efa170671..9a52e9eb35 100644 --- a/src/edu/stanford/nlp/time/XMLUtils.java +++ b/src/edu/stanford/nlp/time/XMLUtils.java @@ -1,6 +1,7 @@ package edu.stanford.nlp.time; import edu.stanford.nlp.io.StringOutputStream; +import static edu.stanford.nlp.util.XMLUtils.safeDocumentBuilderFactory; import org.w3c.dom.*; import javax.xml.parsers.DocumentBuilder; @@ -63,7 +64,7 @@ public static void printNode(OutputStream out, Node node, boolean prettyPrint, b public static Document createDocument() { try { - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbFactory = safeDocumentBuilderFactory(); DocumentBuilder docBuilder = dbFactory.newDocumentBuilder(); Document doc = docBuilder.newDocument(); return doc; @@ -82,7 +83,7 @@ public static Element createElement(String tag) { public static Element parseElement(String xml) { try { - DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbFactory = safeDocumentBuilderFactory(); DocumentBuilder docBuilder = dbFactory.newDocumentBuilder(); Document doc = docBuilder.parse(new ByteArrayInputStream(xml.getBytes())); return doc.getDocumentElement(); diff --git a/src/edu/stanford/nlp/util/XMLUtils.java b/src/edu/stanford/nlp/util/XMLUtils.java index a1556f34f5..520990477b 100644 --- a/src/edu/stanford/nlp/util/XMLUtils.java +++ b/src/edu/stanford/nlp/util/XMLUtils.java @@ -39,6 +39,22 @@ public class XMLUtils { private XMLUtils() {} // only static methods + public static DocumentBuilderFactory safeDocumentBuilderFactory() { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + try { + dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); + dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); + dbf.setFeature("http://apache.org/xml/features/dom/create-entity-ref-nodes", false); + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + } catch (ParserConfigurationException e) { + log.warn(e); + } + return dbf; + } + + /** * Returns the text content of all nodes in the given file with the given tag. * @@ -68,7 +84,7 @@ private static List getTextContentFromTagsFromFileSAXException( File f, String tag) throws SAXException { List sents = Generics.newArrayList(); try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = safeDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(f); doc.getDocumentElement().normalize(); @@ -129,7 +145,7 @@ private static List getTagElementsFromFileSAXException( File f, String tag) throws SAXException { List sents = Generics.newArrayList(); try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = safeDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(f); doc.getDocumentElement().normalize(); @@ -207,7 +223,7 @@ public static List> getTagElementTriplesFromFile File f, String tag, int numIncludedSiblings) throws SAXException { List> sents = Generics.newArrayList(); try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = safeDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(f); doc.getDocumentElement().normalize(); @@ -251,7 +267,7 @@ public static List> getTagElementTriplesFromFile public static DocumentBuilder getXmlParser() { DocumentBuilder db = null; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = safeDocumentBuilderFactory(); dbf.setValidating(false); //Disable DTD loading and validation @@ -283,7 +299,7 @@ public static DocumentBuilder getXmlParser() { public static DocumentBuilder getValidatingXmlParser(File schemaFile) { DocumentBuilder db = null; try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = safeDocumentBuilderFactory(); SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = factory.newSchema(schemaFile); @@ -1206,7 +1222,8 @@ public static XMLTag parseTag(String tagString) { public static Document readDocumentFromFile(String filename) throws Exception { InputSource in = new InputSource(new FileReader(filename)); - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory factory = safeDocumentBuilderFactory(); + factory.setNamespaceAware(false); DocumentBuilder db = factory.newDocumentBuilder(); db.setErrorHandler(new SAXErrorHandler()); @@ -1256,7 +1273,7 @@ public void fatalError(SAXParseException ex) throws SAXParseException { public static Document readDocumentFromString(String s) throws Exception { InputSource in = new InputSource(new StringReader(s)); - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory factory = safeDocumentBuilderFactory(); factory.setNamespaceAware(false); return factory.newDocumentBuilder().parse(in); }