Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 36FA3200C46 for ; Wed, 29 Mar 2017 17:15:07 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 3536A160B95; Wed, 29 Mar 2017 15:15:07 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 08619160B7C for ; Wed, 29 Mar 2017 17:15:05 +0200 (CEST) Received: (qmail 36281 invoked by uid 500); 29 Mar 2017 15:15:05 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 36272 invoked by uid 99); 29 Mar 2017 15:15:05 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Mar 2017 15:15:05 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0B7B6DFBCA; Wed, 29 Mar 2017 15:15:05 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jeagles@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HADOOP-14216. Improve Configuration XML Parsing Performance (jeagles) Date: Wed, 29 Mar 2017 15:15:05 +0000 (UTC) archived-at: Wed, 29 Mar 2017 15:15:07 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2 82b4a9c3d -> 719ae5f2e HADOOP-14216. Improve Configuration XML Parsing Performance (jeagles) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/719ae5f2 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/719ae5f2 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/719ae5f2 Branch: refs/heads/branch-2 Commit: 719ae5f2e8a73528a7d3e2955a8163bbfd4a28d8 Parents: 82b4a9c Author: Jonathan Eagles Authored: Wed Mar 29 10:14:49 2017 -0500 Committer: Jonathan Eagles Committed: Wed Mar 29 10:14:49 2017 -0500 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/pom.xml | 10 + .../org/apache/hadoop/conf/Configuration.java | 301 ++++++++++++------- .../apache/hadoop/conf/TestConfiguration.java | 62 +++- hadoop-project/pom.xml | 10 + 4 files changed, 266 insertions(+), 117 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/719ae5f2/hadoop-common-project/hadoop-common/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index ee7acb3..8b39a7f 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -296,6 +296,16 @@ bcprov-jdk16 test + + org.codehaus.woodstox + stax2-api + compile + + + com.fasterxml + aalto-xml + compile + http://git-wip-us.apache.org/repos/asf/hadoop/blob/719ae5f2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 9301f51..fee84c2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -18,6 +18,7 @@ package org.apache.hadoop.conf; +import com.fasterxml.aalto.stax.InputFactoryImpl; import com.google.common.annotations.VisibleForTesting; import java.io.BufferedInputStream; @@ -63,9 +64,11 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; @@ -93,13 +96,10 @@ import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonGenerator; -import org.w3c.dom.DOMException; +import org.codehaus.stax2.XMLInputFactory2; +import org.codehaus.stax2.XMLStreamReader2; import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.w3c.dom.Text; -import org.xml.sax.SAXException; import com.google.common.base.Preconditions; @@ -270,7 +270,13 @@ public class Configuration implements Iterable>, * the key most recently */ private Map updatingResource; - + + /** + * Specify exact input factory to avoid time finding correct one. + * Factory is reusable across un-synchronized threads once initialized + */ + private static final XMLInputFactory2 factory = new InputFactoryImpl(); + /** * Class to keep the information about the keys which replace the deprecated * ones. @@ -681,7 +687,7 @@ public class Configuration implements Iterable>, addDefaultResource("hadoop-site.xml"); } } - + private Properties properties; private Properties overlay; private ClassLoader classLoader; @@ -2534,8 +2540,8 @@ public class Configuration implements Iterable>, return configMap; } - private Document parse(DocumentBuilder builder, URL url) - throws IOException, SAXException { + private XMLStreamReader parse(URL url) + throws IOException, XMLStreamException { if (!quietmode) { if (LOG.isDebugEnabled()) { LOG.debug("parsing URL " + url); @@ -2551,23 +2557,18 @@ public class Configuration implements Iterable>, // with other users. connection.setUseCaches(false); } - return parse(builder, connection.getInputStream(), url.toString()); + return parse(connection.getInputStream(), url.toString()); } - private Document parse(DocumentBuilder builder, InputStream is, - String systemId) throws IOException, SAXException { + private XMLStreamReader parse(InputStream is, + String systemId) throws IOException, XMLStreamException { if (!quietmode) { LOG.debug("parsing input stream " + is); } if (is == null) { return null; } - try { - return (systemId == null) ? builder.parse(is) : builder.parse(is, - systemId); - } finally { - is.close(); - } + return factory.createXMLStreamReader(systemId, is); } private void loadResources(Properties properties, @@ -2587,37 +2588,20 @@ public class Configuration implements Iterable>, } } - private Resource loadResource(Properties properties, Resource wrapper, boolean quiet) { + private Resource loadResource(Properties properties, + Resource wrapper, boolean quiet) { String name = UNKNOWN_RESOURCE; try { Object resource = wrapper.getResource(); name = wrapper.getName(); - - DocumentBuilderFactory docBuilderFactory - = DocumentBuilderFactory.newInstance(); - //ignore all comments inside the xml file - docBuilderFactory.setIgnoringComments(true); - - //allow includes in the xml file - docBuilderFactory.setNamespaceAware(true); - try { - docBuilderFactory.setXIncludeAware(true); - } catch (UnsupportedOperationException e) { - LOG.error("Failed to set setXIncludeAware(true) for parser " - + docBuilderFactory - + ":" + e, - e); - } - DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); - Document doc = null; - Element root = null; + XMLStreamReader2 reader = null; boolean returnCachedProperties = false; - + if (resource instanceof URL) { // an URL resource - doc = parse(builder, (URL)resource); + reader = (XMLStreamReader2)parse((URL)resource); } else if (resource instanceof String) { // a CLASSPATH resource URL url = getResource((String)resource); - doc = parse(builder, url); + reader = (XMLStreamReader2)parse(url); } else if (resource instanceof Path) { // a file resource // Can't use FileSystem API or we get an infinite loop // since FileSystem uses Configuration API. Use java.io.File instead. @@ -2627,89 +2611,188 @@ public class Configuration implements Iterable>, if (!quiet) { LOG.debug("parsing File " + file); } - doc = parse(builder, new BufferedInputStream( + reader = (XMLStreamReader2)parse(new BufferedInputStream( new FileInputStream(file)), ((Path)resource).toString()); } } else if (resource instanceof InputStream) { - doc = parse(builder, (InputStream) resource, null); + reader = (XMLStreamReader2)parse((InputStream)resource, null); returnCachedProperties = true; } else if (resource instanceof Properties) { overlay(properties, (Properties)resource); - } else if (resource instanceof Element) { - root = (Element)resource; } - if (root == null) { - if (doc == null) { - if (quiet) { - return null; - } - throw new RuntimeException(resource + " not found"); + if (reader == null) { + if (quiet) { + return null; } - root = doc.getDocumentElement(); + throw new RuntimeException(resource + " not found"); } Properties toAddTo = properties; if(returnCachedProperties) { toAddTo = new Properties(); } - if (!"configuration".equals(root.getTagName())) - LOG.fatal("bad conf file: top-level element not "); - NodeList props = root.getChildNodes(); DeprecationContext deprecations = deprecationContext.get(); - for (int i = 0; i < props.getLength(); i++) { - Node propNode = props.item(i); - if (!(propNode instanceof Element)) - continue; - Element prop = (Element)propNode; - if ("configuration".equals(prop.getTagName())) { - loadResource(toAddTo, new Resource(prop, name), quiet); - continue; - } - if (!"property".equals(prop.getTagName())) - LOG.warn("bad conf file: element not "); - NodeList fields = prop.getChildNodes(); - String attr = null; - String value = null; - boolean finalParameter = false; - LinkedList source = new LinkedList(); - for (int j = 0; j < fields.getLength(); j++) { - Node fieldNode = fields.item(j); - if (!(fieldNode instanceof Element)) - continue; - Element field = (Element)fieldNode; - if ("name".equals(field.getTagName()) && field.hasChildNodes()) - attr = StringInterner.weakIntern( - ((Text)field.getFirstChild()).getData().trim()); - if ("value".equals(field.getTagName()) && field.hasChildNodes()) - value = StringInterner.weakIntern( - ((Text)field.getFirstChild()).getData()); - if ("final".equals(field.getTagName()) && field.hasChildNodes()) - finalParameter = "true".equals(((Text)field.getFirstChild()).getData()); - if ("source".equals(field.getTagName()) && field.hasChildNodes()) - source.add(StringInterner.weakIntern( - ((Text)field.getFirstChild()).getData())); - } - source.add(name); - - // Ignore this parameter if it has already been marked as 'final' - if (attr != null) { - if (deprecations.getDeprecatedKeyMap().containsKey(attr)) { - DeprecatedKeyInfo keyInfo = - deprecations.getDeprecatedKeyMap().get(attr); - keyInfo.clearAccessed(); - for (String key:keyInfo.newKeys) { - // update new keys with deprecated key's value - loadProperty(toAddTo, name, key, value, finalParameter, - source.toArray(new String[source.size()])); + + StringBuilder token = new StringBuilder(); + String confName = null; + String confValue = null; + boolean confFinal = false; + boolean fallbackAllowed = false; + boolean fallbackEntered = false; + boolean parseToken = false; + LinkedList confSource = new LinkedList(); + + while (reader.hasNext()) { + switch (reader.next()) { + case XMLStreamConstants.START_ELEMENT: + switch (reader.getLocalName()) { + case "property": + confName = null; + confValue = null; + confFinal = false; + confSource.clear(); + + // First test for short format configuration + int attrCount = reader.getAttributeCount(); + for (int i = 0; i < attrCount; i++) { + String propertyAttr = reader.getAttributeLocalName(i); + if ("name".equals(propertyAttr)) { + confName = StringInterner.weakIntern( + reader.getAttributeValue(i)); + } else if ("value".equals(propertyAttr)) { + confValue = StringInterner.weakIntern( + reader.getAttributeValue(i)); + } else if ("final".equals(propertyAttr)) { + confFinal = "true".equals(reader.getAttributeValue(i)); + } else if ("source".equals(propertyAttr)) { + confSource.add(StringInterner.weakIntern( + reader.getAttributeValue(i))); + } + } + break; + case "name": + case "value": + case "final": + case "source": + parseToken = true; + token.setLength(0); + break; + case "include": + if (!"xi".equals(reader.getPrefix())) { + break; + } + // Determine href for xi:include + String confInclude = null; + attrCount = reader.getAttributeCount(); + for (int i = 0; i < attrCount; i++) { + String attrName = reader.getAttributeLocalName(i); + if ("href".equals(attrName)) { + confInclude = reader.getAttributeValue(i); + } + } + if (confInclude == null) { + break; + } + // Determine if the included resource is a classpath resource + // otherwise fallback to a file resource + // xi:include are treated as inline and retain current source + URL include = getResource(confInclude); + if (include != null) { + Resource classpathResource = new Resource(include, name); + loadResource(properties, classpathResource, quiet); + } else { + File href = new File(confInclude); + if (!href.isAbsolute()) { + // Included resources are relative to the current resource + File baseFile = new File(name).getParentFile(); + href = new File(baseFile, href.getPath()); + } + if (!href.exists()) { + // Resource errors are non-fatal iff there is 1 xi:fallback + fallbackAllowed = true; + break; + } + Resource uriResource = new Resource(href.toURI().toURL(), name); + loadResource(properties, uriResource, quiet); } + break; + case "fallback": + if (!"xi".equals(reader.getPrefix())) { + break; + } + fallbackEntered = true; + break; + case "configuration": + break; + default: + break; } - else { - loadProperty(toAddTo, name, attr, value, finalParameter, - source.toArray(new String[source.size()])); + break; + + case XMLStreamConstants.CHARACTERS: + if (parseToken) { + char[] text = reader.getTextCharacters(); + token.append(text, reader.getTextStart(), reader.getTextLength()); } + break; + + case XMLStreamConstants.END_ELEMENT: + switch (reader.getLocalName()) { + case "name": + if (token.length() > 0) { + confName = StringInterner.weakIntern(token.toString().trim()); + } + break; + case "value": + if (token.length() > 0) { + confValue = StringInterner.weakIntern(token.toString()); + } + break; + case "final": + confFinal = "true".equals(token.toString()); + break; + case "source": + confSource.add(StringInterner.weakIntern(token.toString())); + break; + case "include": + if (!"xi".equals(reader.getPrefix())) { + break; + } + if (fallbackAllowed && !fallbackEntered) { + throw new IOException("Fetch fail on include with no " + + "fallback while loading '" + name + "'"); + } + fallbackAllowed = false; + fallbackEntered = false; + break; + case "property": + if (confName == null || (!fallbackAllowed && fallbackEntered)) { + break; + } + confSource.add(name); + DeprecatedKeyInfo keyInfo = + deprecations.getDeprecatedKeyMap().get(confName); + if (keyInfo != null) { + keyInfo.clearAccessed(); + for (String key : keyInfo.newKeys) { + // update new keys with deprecated key's value + loadProperty(toAddTo, name, key, confValue, confFinal, + confSource.toArray(new String[confSource.size()])); + } + } else { + loadProperty(toAddTo, name, confName, confValue, confFinal, + confSource.toArray(new String[confSource.size()])); + } + break; + default: + break; + } + default: + break; } } - + reader.close(); + if (returnCachedProperties) { overlay(properties, toAddTo); return new Resource(toAddTo, name); @@ -2718,15 +2801,9 @@ public class Configuration implements Iterable>, } catch (IOException e) { LOG.fatal("error parsing conf " + name, e); throw new RuntimeException(e); - } catch (DOMException e) { + } catch (XMLStreamException e) { LOG.fatal("error parsing conf " + name, e); throw new RuntimeException(e); - } catch (SAXException e) { - LOG.fatal("error parsing conf " + name, e); - throw new RuntimeException(e); - } catch (ParserConfigurationException e) { - LOG.fatal("error parsing conf " + name , e); - throw new RuntimeException(e); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/719ae5f2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 7c0a4a3..5d82002 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -99,10 +99,22 @@ public class TestConfiguration extends TestCase { out.close(); } - private void addInclude(String filename) throws IOException{ - out.write("\n "); + private void startInclude(String filename) throws IOException { + out.write("\n "); } - + + private void endInclude() throws IOException{ + out.write("\n "); + } + + private void startFallback() throws IOException { + out.write("\n "); + } + + private void endFallback() throws IOException { + out.write("\n "); + } + public void testInputStreamResource() throws Exception { StringWriter writer = new StringWriter(); out = new BufferedWriter(writer); @@ -419,7 +431,8 @@ public class TestConfiguration extends TestCase { out=new BufferedWriter(new FileWriter(CONFIG)); startConfig(); - addInclude(CONFIG2); + startInclude(CONFIG2); + endInclude(); appendProperty("e","f"); appendProperty("g","h"); endConfig(); @@ -434,6 +447,44 @@ public class TestConfiguration extends TestCase { tearDown(); } + public void testIncludesWithFallback() throws Exception { + tearDown(); + out=new BufferedWriter(new FileWriter(CONFIG2)); + startConfig(); + appendProperty("a","b"); + appendProperty("c","d"); + endConfig(); + + out=new BufferedWriter(new FileWriter(CONFIG)); + startConfig(); + startInclude(CONFIG2); + startFallback(); + appendProperty("a", "b.fallback"); + appendProperty("c", "d.fallback", true); + endFallback(); + endInclude(); + appendProperty("e","f"); + appendProperty("g","h"); + startInclude("MissingConfig.xml"); + startFallback(); + appendProperty("i", "j.fallback"); + appendProperty("k", "l.fallback", true); + endFallback(); + endInclude(); + endConfig(); + + // verify that the includes file contains all properties + Path fileResource = new Path(CONFIG); + conf.addResource(fileResource); + assertEquals("b", conf.get("a")); + assertEquals("d", conf.get("c")); + assertEquals("f", conf.get("e")); + assertEquals("h", conf.get("g")); + assertEquals("j.fallback", conf.get("i")); + assertEquals("l.fallback", conf.get("k")); + tearDown(); + } + public void testRelativeIncludes() throws Exception { tearDown(); String relConfig = new File("./tmp/test-config.xml").getAbsolutePath(); @@ -448,7 +499,8 @@ public class TestConfiguration extends TestCase { out = new BufferedWriter(new FileWriter(relConfig)); startConfig(); // Add the relative path instead of the absolute one. - addInclude(new File(relConfig2).getName()); + startInclude(new File(relConfig2).getName()); + endInclude(); appendProperty("c", "d"); endConfig(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/719ae5f2/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index f21b789..831197c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -758,6 +758,16 @@ 3.1.1 + org.codehaus.woodstox + stax2-api + 3.1.4 + + + com.fasterxml + aalto-xml + 1.0.0 + + org.codehaus.jackson jackson-mapper-asl ${jackson.version} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org