Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BE82417DFB for ; Mon, 6 Apr 2015 17:39:23 +0000 (UTC) Received: (qmail 73757 invoked by uid 500); 6 Apr 2015 17:38:19 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 73519 invoked by uid 500); 6 Apr 2015 17:38:18 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 72074 invoked by uid 99); 6 Apr 2015 17:38:15 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 06 Apr 2015 17:38:15 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A28F7E2F24; Mon, 6 Apr 2015 17:38:01 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zhz@apache.org To: common-commits@hadoop.apache.org Date: Mon, 06 Apr 2015 17:38:42 -0000 Message-Id: <3e584755d6ca4fe592d282534f2787c0@git.apache.org> In-Reply-To: <374e05a227944c769d59d36f99a37f26@git.apache.org> References: <374e05a227944c769d59d36f99a37f26@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [42/51] [abbrv] hadoop git commit: HADOOP-11664. Loading predefined EC schemas from configuration. Contributed by Kai Zheng. HADOOP-11664. Loading predefined EC schemas from configuration. Contributed by Kai Zheng. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a5097a48 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a5097a48 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a5097a48 Branch: refs/heads/HDFS-7285 Commit: a5097a481b0b8e1cc9f293882b2a5fa60075b6e1 Parents: d3ccfa9 Author: Zhe Zhang Authored: Fri Mar 27 14:52:50 2015 -0700 Committer: Zhe Zhang Committed: Mon Apr 6 10:21:02 2015 -0700 ---------------------------------------------------------------------- .../src/main/conf/ecschema-def.xml | 40 +++++ .../hadoop/fs/CommonConfigurationKeys.java | 5 + .../hadoop/io/erasurecode/SchemaLoader.java | 147 +++++++++++++++++++ .../hadoop/io/erasurecode/TestSchemaLoader.java | 80 ++++++++++ 4 files changed, 272 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a5097a48/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml new file mode 100644 index 0000000..e619485 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml @@ -0,0 +1,40 @@ + + + + + + + + + 6 + 3 + RS + + + 10 + 4 + RS + + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/a5097a48/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index 70fea01..af32674 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -141,6 +141,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { /** Supported erasure codec classes */ public static final String IO_ERASURECODE_CODECS_KEY = "io.erasurecode.codecs"; + public static final String IO_ERASURECODE_SCHEMA_FILE_KEY = + "io.erasurecode.schema.file"; + public static final String IO_ERASURECODE_SCHEMA_FILE_DEFAULT = + "ecschema-def.xml"; + /** Use XOR raw coder when possible for the RS codec */ public static final String IO_ERASURECODE_CODEC_RS_USEXOR_KEY = "io.erasurecode.codec.rs.usexor"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/a5097a48/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java new file mode 100644 index 0000000..c51ed37 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.w3c.dom.*; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.*; + +/** + * A EC schema loading utility that loads predefined EC schemas from XML file + */ +public class SchemaLoader { + private static final Log LOG = LogFactory.getLog(SchemaLoader.class.getName()); + + /** + * Load predefined ec schemas from configuration file. This file is + * expected to be in the XML format. + */ + public List loadSchema(Configuration conf) { + File confFile = getSchemaFile(conf); + if (confFile == null) { + LOG.warn("Not found any predefined EC schema file"); + return Collections.emptyList(); + } + + try { + return loadSchema(confFile); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } catch (IOException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } catch (SAXException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } + } + + private List loadSchema(File schemaFile) + throws ParserConfigurationException, IOException, SAXException { + + LOG.info("Loading predefined EC schema file " + schemaFile); + + // Read and parse the schema file. + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setIgnoringComments(true); + DocumentBuilder builder = dbf.newDocumentBuilder(); + Document doc = builder.parse(schemaFile); + Element root = doc.getDocumentElement(); + + if (!"schemas".equals(root.getTagName())) { + throw new RuntimeException("Bad EC schema config file: " + + "top-level element not "); + } + + NodeList elements = root.getChildNodes(); + List schemas = new ArrayList(); + for (int i = 0; i < elements.getLength(); i++) { + Node node = elements.item(i); + if (node instanceof Element) { + Element element = (Element) node; + if ("schema".equals(element.getTagName())) { + ECSchema schema = loadSchema(element); + schemas.add(schema); + } else { + LOG.warn("Bad element in EC schema configuration file: " + + element.getTagName()); + } + } + } + + return schemas; + } + + /** + * Path to the XML file containing predefined ec schemas. If the path is + * relative, it is searched for in the classpath. + */ + private File getSchemaFile(Configuration conf) { + String schemaFilePath = conf.get( + CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_KEY, + CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_DEFAULT); + File schemaFile = new File(schemaFilePath); + if (! schemaFile.isAbsolute()) { + URL url = Thread.currentThread().getContextClassLoader() + .getResource(schemaFilePath); + if (url == null) { + LOG.warn(schemaFilePath + " not found on the classpath."); + schemaFile = null; + } else if (! url.getProtocol().equalsIgnoreCase("file")) { + throw new RuntimeException( + "EC predefined schema file " + url + + " found on the classpath is not on the local filesystem."); + } else { + schemaFile = new File(url.getPath()); + } + } + + return schemaFile; + } + + /** + * Loads a schema from a schema element in the configuration file + */ + private ECSchema loadSchema(Element element) { + String schemaName = element.getAttribute("name"); + Map ecOptions = new HashMap(); + NodeList fields = element.getChildNodes(); + + for (int i = 0; i < fields.getLength(); i++) { + Node fieldNode = fields.item(i); + if (fieldNode instanceof Element) { + Element field = (Element) fieldNode; + String tagName = field.getTagName(); + String value = ((Text) field.getFirstChild()).getData().trim(); + ecOptions.put(tagName, value); + } + } + + ECSchema schema = new ECSchema(schemaName, ecOptions); + return schema; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/a5097a48/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java new file mode 100644 index 0000000..7bb0a9a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.junit.Test; + +import java.io.File; +import java.io.FileWriter; +import java.io.PrintWriter; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class TestSchemaLoader { + + final static String TEST_DIR = new File(System.getProperty( + "test.build.data", "/tmp")).getAbsolutePath(); + + final static String SCHEMA_FILE = new File(TEST_DIR, "test-ecschema") + .getAbsolutePath(); + + @Test + public void testLoadSchema() throws Exception { + PrintWriter out = new PrintWriter(new FileWriter(SCHEMA_FILE)); + out.println(""); + out.println(""); + out.println(" "); + out.println(" 6"); + out.println(" 3"); + out.println(" RS"); + out.println(" "); + out.println(" "); + out.println(" 10"); + out.println(" 4"); + out.println(" RS"); + out.println(" "); + out.println(""); + out.close(); + + Configuration conf = new Configuration(); + conf.set(CommonConfigurationKeys.IO_ERASURECODE_SCHEMA_FILE_KEY, + SCHEMA_FILE); + + SchemaLoader schemaLoader = new SchemaLoader(); + List schemas = schemaLoader.loadSchema(conf); + + assertEquals(2, schemas.size()); + + ECSchema schema1 = schemas.get(0); + assertEquals("RSk6m3", schema1.getSchemaName()); + assertEquals(3, schema1.getOptions().size()); + assertEquals(6, schema1.getNumDataUnits()); + assertEquals(3, schema1.getNumParityUnits()); + assertEquals("RS", schema1.getCodecName()); + + ECSchema schema2 = schemas.get(1); + assertEquals("RSk10m4", schema2.getSchemaName()); + assertEquals(3, schema2.getOptions().size()); + assertEquals(10, schema2.getNumDataUnits()); + assertEquals(4, schema2.getNumParityUnits()); + assertEquals("RS", schema2.getCodecName()); + } +} \ No newline at end of file