lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject lucene-solr:branch_7_4: SOLR-12450: Don't allow referal to external resources in various config files
Date Sun, 17 Jun 2018 11:27:57 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7_4 518d30350 -> 3aa6086ed


SOLR-12450: Don't allow referal to external resources in various config files

# Conflicts:
#	solr/CHANGES.txt

# Conflicts:
#	solr/CHANGES.txt


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3aa6086e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3aa6086e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3aa6086e

Branch: refs/heads/branch_7_4
Commit: 3aa6086ed99fa7158d423dc7c33dae6da466b093
Parents: 518d303
Author: Uwe Schindler <uschindler@apache.org>
Authored: Sun Jun 10 11:47:22 2018 +0200
Committer: Uwe Schindler <uschindler@apache.org>
Committed: Sun Jun 17 13:27:39 2018 +0200

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../handler/extraction/ParseContextConfig.java  |  12 +-
 .../apache/solr/schema/AbstractEnumField.java   | 101 +++++++---------
 .../solr/schema/FileExchangeRateProvider.java   | 106 +++++++---------
 .../org/apache/solr/util/SafeXMLParsing.java    | 120 +++++++++++++++++++
 .../apache/solr/util/TestSafeXMLParsing.java    |  99 +++++++++++++++
 6 files changed, 313 insertions(+), 128 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c78d83c..7ac0e07 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -297,6 +297,9 @@ Bug Fixes
 * SOLR-12416: When creating a time routed alias, the router.autoDeleteAge option wasn't considered.
   (Joachim Sauer via David Smiley)
 
+* SOLR-12450: Don't allow referal to external resources in various config files.
+  (Yuyang Xiao, Uwe Schindler)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ParseContextConfig.java
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ParseContextConfig.java
b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ParseContextConfig.java
index 8b9f1ef..b9f58f0 100644
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ParseContextConfig.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ParseContextConfig.java
@@ -16,20 +16,22 @@
  */
 package org.apache.solr.handler.extraction;
 
-import javax.xml.parsers.DocumentBuilderFactory;
 import java.beans.BeanInfo;
 import java.beans.Introspector;
 import java.beans.PropertyDescriptor;
 import java.beans.PropertyEditor;
 import java.beans.PropertyEditorManager;
-import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
 import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.util.SafeXMLParsing;
 import org.apache.tika.parser.ParseContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.NamedNodeMap;
@@ -37,6 +39,8 @@ import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 public class ParseContextConfig {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
   private final Map<Class<?>, Object> entries = new HashMap<>();
 
   /** Creates an empty Config without any settings (used as placeholder). */
@@ -54,9 +58,7 @@ public class ParseContextConfig {
   }
   
   private static Document loadConfigFile(SolrResourceLoader resourceLoader, String parseContextConfigLoc)
throws Exception {
-    try (InputStream in = resourceLoader.openResource(parseContextConfigLoc)) {
-      return DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(in, parseContextConfigLoc);
-    }
+    return SafeXMLParsing.parseConfigXML(log, resourceLoader, parseContextConfigLoc);
   }
 
   private void extract(Element element, SolrResourceLoader loader) throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java b/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java
index 06f3c32..10060d7 100644
--- a/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java
+++ b/solr/core/src/java/org/apache/solr/schema/AbstractEnumField.java
@@ -18,14 +18,12 @@
 package org.apache.solr.schema;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.lang.invoke.MethodHandles;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
+
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpressionException;
@@ -38,8 +36,10 @@ import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.solr.common.EnumFieldValue;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
+import org.apache.solr.util.SafeXMLParsing;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -51,7 +51,6 @@ import org.xml.sax.SAXException;
  * Abstract Field type for support of string values with custom sort order.
  */
 public abstract class AbstractEnumField extends PrimitiveFieldType {
-  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   protected EnumMapping enumMapping;
   
   @Override
@@ -69,6 +68,8 @@ public abstract class AbstractEnumField extends PrimitiveFieldType {
    * @lucene.internal
    */
   public static final class EnumMapping {
+    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
     public static final String PARAM_ENUMS_CONFIG = "enumsConfig";
     public static final String PARAM_ENUM_NAME = "enumName";
     public static final Integer DEFAULT_VALUE = -1;
@@ -105,67 +106,49 @@ public abstract class AbstractEnumField extends PrimitiveFieldType {
                                 ftName + ": No enum name was configured.");
       }
       
-      InputStream is = null;
-      
+      final SolrResourceLoader loader = schema.getResourceLoader(); 
       try {
-        is = schema.getResourceLoader().openResource(enumsConfigFile);
-        final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-        try {
-          final Document doc = dbf.newDocumentBuilder().parse(is);
-          final XPathFactory xpathFactory = XPathFactory.newInstance();
-          final XPath xpath = xpathFactory.newXPath();
-          final String xpathStr = String.format(Locale.ROOT, "/enumsConfig/enum[@name='%s']",
enumName);
-          final NodeList nodes = (NodeList) xpath.evaluate(xpathStr, doc, XPathConstants.NODESET);
-          final int nodesLength = nodes.getLength();
-          if (nodesLength == 0) {
-            String exceptionMessage = String.format
-              (Locale.ENGLISH, "%s: No enum configuration found for enum '%s' in %s.",
+        log.debug("Reloading enums config file from {}", enumsConfigFile);
+        Document doc = SafeXMLParsing.parseConfigXML(log, loader, enumsConfigFile);
+        final XPathFactory xpathFactory = XPathFactory.newInstance();
+        final XPath xpath = xpathFactory.newXPath();
+        final String xpathStr = String.format(Locale.ROOT, "/enumsConfig/enum[@name='%s']",
enumName);
+        final NodeList nodes = (NodeList) xpath.evaluate(xpathStr, doc, XPathConstants.NODESET);
+        final int nodesLength = nodes.getLength();
+        if (nodesLength == 0) {
+          String exceptionMessage = String.format
+            (Locale.ENGLISH, "%s: No enum configuration found for enum '%s' in %s.",
+             ftName, enumName, enumsConfigFile);
+          throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage);
+        }
+        if (nodesLength > 1) {
+          if (log.isWarnEnabled())
+            log.warn("{}: More than one enum configuration found for enum '{}' in {}. The
last one was taken.",
+                     ftName, enumName, enumsConfigFile);
+        }
+        final Node enumNode = nodes.item(nodesLength - 1);
+        final NodeList valueNodes = (NodeList) xpath.evaluate("value", enumNode, XPathConstants.NODESET);
+        for (int i = 0; i < valueNodes.getLength(); i++) {
+          final Node valueNode = valueNodes.item(i);
+          final String valueStr = valueNode.getTextContent();
+          if ((valueStr == null) || (valueStr.length() == 0)) {
+            final String exceptionMessage = String.format
+              (Locale.ENGLISH, "%s: A value was defined with an no value in enum '%s' in
%s.",
                ftName, enumName, enumsConfigFile);
-            throw new SolrException(SolrException.ErrorCode.NOT_FOUND, exceptionMessage);
-          }
-          if (nodesLength > 1) {
-            if (log.isWarnEnabled())
-              log.warn("{}: More than one enum configuration found for enum '{}' in {}. The
last one was taken.",
-                       ftName, enumName, enumsConfigFile);
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
           }
-          final Node enumNode = nodes.item(nodesLength - 1);
-          final NodeList valueNodes = (NodeList) xpath.evaluate("value", enumNode, XPathConstants.NODESET);
-          for (int i = 0; i < valueNodes.getLength(); i++) {
-            final Node valueNode = valueNodes.item(i);
-            final String valueStr = valueNode.getTextContent();
-            if ((valueStr == null) || (valueStr.length() == 0)) {
-              final String exceptionMessage = String.format
-                (Locale.ENGLISH, "%s: A value was defined with an no value in enum '%s' in
%s.",
-                 ftName, enumName, enumsConfigFile);
-              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
-            }
-            if (enumStringToIntMap.containsKey(valueStr)) {
-              final String exceptionMessage = String.format
-                (Locale.ENGLISH, "%s: A duplicated definition was found for value '%s' in
enum '%s' in %s.",
-                 ftName, valueStr, enumName, enumsConfigFile);
-              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
-            }
-            enumIntToStringMap.put(i, valueStr);
-            enumStringToIntMap.put(valueStr, i);
+          if (enumStringToIntMap.containsKey(valueStr)) {
+            final String exceptionMessage = String.format
+              (Locale.ENGLISH, "%s: A duplicated definition was found for value '%s' in enum
'%s' in %s.",
+               ftName, valueStr, enumName, enumsConfigFile);
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, exceptionMessage);
           }
+          enumIntToStringMap.put(i, valueStr);
+          enumStringToIntMap.put(valueStr, i);
         }
-        catch (ParserConfigurationException | XPathExpressionException | SAXException e)
{
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-                                  ftName + ": Error parsing enums config.", e);
-        }
-      }
-      catch (IOException e) {
+      } catch (IOException | SAXException | XPathExpressionException e) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-                                ftName + ": Error while opening enums config.", e);
-      } finally {
-        try {
-          if (is != null) {
-            is.close();
-          }
-        }
-        catch (IOException e) {
-          e.printStackTrace();
-        }
+                                ftName + ": Error while parsing enums config.", e);
       }
       
       if ((enumStringToIntMap.size() == 0) || (enumIntToStringMap.size() == 0)) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/core/src/java/org/apache/solr/schema/FileExchangeRateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/FileExchangeRateProvider.java b/solr/core/src/java/org/apache/solr/schema/FileExchangeRateProvider.java
index 0148617..4836764 100644
--- a/solr/core/src/java/org/apache/solr/schema/FileExchangeRateProvider.java
+++ b/solr/core/src/java/org/apache/solr/schema/FileExchangeRateProvider.java
@@ -17,22 +17,21 @@
 
 package org.apache.solr.schema;
 
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
 import java.io.IOException;
-import java.io.InputStream;
 import java.lang.invoke.MethodHandles;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.util.SafeXMLParsing;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -46,6 +45,7 @@ import org.xml.sax.SAXException;
  */
 class FileExchangeRateProvider implements ExchangeRateProvider {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
   protected static final String PARAM_CURRENCY_CONFIG       = "currencyConfig";
 
   // Exchange rate map, maps Currency Code -> Currency Code -> Rate
@@ -159,71 +159,49 @@ class FileExchangeRateProvider implements ExchangeRateProvider {
 
   @Override
   public boolean reload() throws SolrException {
-    InputStream is = null;
     Map<String, Map<String, Double>> tmpRates = new HashMap<>();
+    log.debug("Reloading exchange rates from file {}", currencyConfigFile);
+
     try {
-      log.debug("Reloading exchange rates from file "+this.currencyConfigFile);
-
-      is = loader.openResource(currencyConfigFile);
-      javax.xml.parsers.DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
-      try {
-        dbf.setXIncludeAware(true);
-        dbf.setNamespaceAware(true);
-      } catch (UnsupportedOperationException e) {
-        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't
support XInclude option", e);
-      }
+      Document doc = SafeXMLParsing.parseConfigXML(log, loader, currencyConfigFile);
+      XPathFactory xpathFactory = XPathFactory.newInstance();
+      XPath xpath = xpathFactory.newXPath();
+      
+      // Parse exchange rates.
+      NodeList nodes = (NodeList) xpath.evaluate("/currencyConfig/rates/rate", doc, XPathConstants.NODESET);
       
-      try {
-        Document doc = dbf.newDocumentBuilder().parse(is);
-        XPathFactory xpathFactory = XPathFactory.newInstance();
-        XPath xpath = xpathFactory.newXPath();
+      for (int i = 0; i < nodes.getLength(); i++) {
+        Node rateNode = nodes.item(i);
+        NamedNodeMap attributes = rateNode.getAttributes();
+        Node from = attributes.getNamedItem("from");
+        Node to = attributes.getNamedItem("to");
+        Node rate = attributes.getNamedItem("rate");
+        
+        if (from == null || to == null || rate == null) {
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exchange rate missing
attributes (required: from, to, rate) " + rateNode);
+        }
         
-        // Parse exchange rates.
-        NodeList nodes = (NodeList) xpath.evaluate("/currencyConfig/rates/rate", doc, XPathConstants.NODESET);
+        String fromCurrency = from.getNodeValue();
+        String toCurrency = to.getNodeValue();
+        Double exchangeRate;
         
-        for (int i = 0; i < nodes.getLength(); i++) {
-          Node rateNode = nodes.item(i);
-          NamedNodeMap attributes = rateNode.getAttributes();
-          Node from = attributes.getNamedItem("from");
-          Node to = attributes.getNamedItem("to");
-          Node rate = attributes.getNamedItem("rate");
-          
-          if (from == null || to == null || rate == null) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exchange rate
missing attributes (required: from, to, rate) " + rateNode);
-          }
-          
-          String fromCurrency = from.getNodeValue();
-          String toCurrency = to.getNodeValue();
-          Double exchangeRate;
-          
-          if (null == CurrencyFieldType.getCurrency(fromCurrency)) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Specified 'from'
currency not supported in this JVM: " + fromCurrency);
-          }
-          if (null == CurrencyFieldType.getCurrency(toCurrency)) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Specified 'to'
currency not supported in this JVM: " + toCurrency);
-          }
-          
-          try {
-            exchangeRate = Double.parseDouble(rate.getNodeValue());
-          } catch (NumberFormatException e) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not parse
exchange rate: " + rateNode, e);
-          }
-          
-          addRate(tmpRates, fromCurrency, toCurrency, exchangeRate);
+        if (null == CurrencyFieldType.getCurrency(fromCurrency)) {
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Specified 'from'
currency not supported in this JVM: " + fromCurrency);
         }
-      } catch (SAXException | XPathExpressionException | ParserConfigurationException | IOException
e) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error parsing currency
config.", e);
-      }
-    } catch (IOException e) {
-      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while opening
Currency configuration file "+currencyConfigFile, e);
-    } finally {
-      try {
-        if (is != null) {
-          is.close();
+        if (null == CurrencyFieldType.getCurrency(toCurrency)) {
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Specified 'to' currency
not supported in this JVM: " + toCurrency);
+        }
+        
+        try {
+          exchangeRate = Double.parseDouble(rate.getNodeValue());
+        } catch (NumberFormatException e) {
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not parse
exchange rate: " + rateNode, e);
         }
-      } catch (IOException e) {
-        e.printStackTrace();
+        
+        addRate(tmpRates, fromCurrency, toCurrency, exchangeRate);
       }
+    } catch (SAXException | IOException | XPathExpressionException e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while parsing
currency configuration file "+currencyConfigFile, e);
     }
     // Atomically swap in the new rates map, if it loaded successfully
     this.rates = tmpRates;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java b/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
new file mode 100644
index 0000000..6d8dbf7
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/SafeXMLParsing.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.solr.common.EmptyEntityResolver;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SuppressForbidden;
+import org.apache.solr.common.util.XMLErrorLogger;
+import org.slf4j.Logger;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+/**
+ * Some utility methods for parsing XML in a safe way. This class can be used to parse XML
+ * coming from network (completely untrusted) or it can load a config file from a
+ * {@link ResourceLoader}. In this case it allows external entities and xincludes, but only
+ * referring to files reachable by the loader.
+ */
+@SuppressForbidden(reason = "This class uses XML APIs directly that should not be used anywhere
else in Solr code")
+public final class SafeXMLParsing  {
+  
+  public static final String SYSTEMID_UNTRUSTED = "untrusted://stream";
+
+  private SafeXMLParsing() {}
+  
+  /** Parses a config file from ResourceLoader. Xinclude and external entities are enabled,
but cannot escape the resource loader. */
+  public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws
SAXException, IOException {
+    try (InputStream in = loader.openResource(file)) {
+      final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setValidating(false);
+      dbf.setNamespaceAware(true);
+      trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+      try {
+        dbf.setXIncludeAware(true);
+      } catch (UnsupportedOperationException e) {
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't
support XInclude option", e);
+      }
+      
+      final DocumentBuilder db = dbf.newDocumentBuilder();
+      db.setEntityResolver(new SystemIdResolver(loader));
+      db.setErrorHandler(new XMLErrorLogger(log));
+      return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
+    } catch (ParserConfigurationException pce) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be
configured", pce);
+    }
+  }
+
+  /** Parses the given InputStream as XML, disabling any external entities with secure processing
enabled.
+   * The given InputStream is not closed. */
+  public static Document parseUntrustedXML(Logger log, InputStream in) throws SAXException,
IOException {
+    return getUntrustedDocumentBuilder(log).parse(new CloseShieldInputStream(in), SYSTEMID_UNTRUSTED);
+  }
+  
+  /** Parses the given InputStream as XML, disabling any external entities with secure processing
enabled.
+   * The given Reader is not closed. */
+  public static Document parseUntrustedXML(Logger log, Reader reader) throws SAXException,
IOException {
+    final InputSource is = new InputSource(new FilterReader(reader) {
+      @Override public void close() {}
+    });
+    is.setSystemId(SYSTEMID_UNTRUSTED);
+    return getUntrustedDocumentBuilder(log).parse(is);
+  }
+  
+  public static Document parseUntrustedXML(Logger log, String xml) throws SAXException, IOException
{
+    return parseUntrustedXML(log, new StringReader(xml));
+  }
+
+  private static DocumentBuilder getUntrustedDocumentBuilder(Logger log) {
+    try {
+      final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setValidating(false);
+      dbf.setNamespaceAware(true);
+      trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+      
+      final DocumentBuilder db = dbf.newDocumentBuilder();
+      db.setEntityResolver(EmptyEntityResolver.SAX_INSTANCE);
+      db.setErrorHandler(new XMLErrorLogger(log));
+      return db;
+    } catch (ParserConfigurationException pce) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be
configured", pce);
+    }
+  }
+  
+  private static void trySetDOMFeature(DocumentBuilderFactory factory, String feature, boolean
enabled) {
+    try {
+      factory.setFeature(feature, enabled);
+    } catch (Exception ex) {
+      // ignore
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3aa6086e/solr/core/src/test/org/apache/solr/util/TestSafeXMLParsing.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/util/TestSafeXMLParsing.java b/solr/core/src/test/org/apache/solr/util/TestSafeXMLParsing.java
new file mode 100644
index 0000000..db71a8c
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/util/TestSafeXMLParsing.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.util.LuceneTestCase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+
+public class TestSafeXMLParsing extends LuceneTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
+  public void testUntrusted() throws Exception {
+    // TODO: Fix the underlying EmptyEntityResolver to not replace external entities by nothing
and instead throw exception:
+    Document doc = SafeXMLParsing.parseUntrustedXML(log, "<!DOCTYPE test [\n" + 
+        "<!ENTITY internalTerm \"foobar\">\n" + 
+        "<!ENTITY externalTerm SYSTEM \"foo://bar.xyz/external\">\n" + 
+        "]>\n" + 
+        "<test>&internalTerm;&externalTerm;</test>");
+    assertEquals("foobar", doc.getDocumentElement().getTextContent());
+  }
+  
+  InputStream getStringStream(String xml) {
+    return new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8));
+  }
+  
+  public void testConfig() throws Exception {
+    final ResourceLoader loader = new ResourceLoader() {
+      @Override
+      public InputStream openResource(String resource) throws IOException {
+        switch (resource) {
+          case "source1.xml":
+            return getStringStream("<!DOCTYPE test [\n" + 
+                "<!ENTITY externalTerm SYSTEM \"foo://bar.xyz/external\">\n" + 
+                "]>\n" + 
+                "<test>&externalTerm;</test>");
+          case "source2.xml":
+            return getStringStream("<!DOCTYPE test [\n" + 
+                "<!ENTITY externalTerm SYSTEM \"./include1.xml\">\n" + 
+                "]>\n" + 
+                "<test>&externalTerm;</test>");
+          case "source3.xml":
+            return getStringStream("<foo xmlns:xi=\"http://www.w3.org/2001/XInclude\">\n"
+ 
+                "  <xi:include href=\"./include2.xml\"/>\n" + 
+                "</foo>");
+          case "include1.xml":
+            return getStringStream("Make XML Great Again!™");
+          case "include2.xml":
+            return getStringStream("<bar>Make XML Great Again!™</bar>");
+        }
+        throw new IOException("Resource not found: " + resource);
+      }
+
+      @Override
+      public <T> Class<? extends T> findClass(String cname, Class<T> expectedType)
{
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public <T> T newInstance(String cname, Class<T> expectedType) {
+        throw new UnsupportedOperationException();
+      }
+      
+    };
+    
+    IOException ioe = expectThrows(IOException.class, () -> {
+      SafeXMLParsing.parseConfigXML(log, loader, "source1.xml");
+    });
+    assertTrue(ioe.getMessage().contains("Cannot resolve absolute systemIDs"));
+    
+    Document doc = SafeXMLParsing.parseConfigXML(log, loader, "source2.xml");
+    assertEquals("Make XML Great Again!™", doc.getDocumentElement().getTextContent());
+    
+    doc = SafeXMLParsing.parseConfigXML(log, loader, "source3.xml");
+    assertEquals("Make XML Great Again!™", doc.getDocumentElement().getTextContent().trim());
+  }
+
+}


Mime
View raw message