Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 09EE917668 for ; Mon, 13 Apr 2015 20:11:14 +0000 (UTC) Received: (qmail 54236 invoked by uid 500); 13 Apr 2015 20:10:39 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 54120 invoked by uid 500); 13 Apr 2015 20:10:39 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 52577 invoked by uid 99); 13 Apr 2015 20:10:38 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 13 Apr 2015 20:10:38 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 788D9E0A9A; Mon, 13 Apr 2015 20:10:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zhz@apache.org To: common-commits@hadoop.apache.org Date: Mon, 13 Apr 2015 20:11:17 -0000 Message-Id: <16ea1e1a93ac4f9ab28941d52afa3c2c@git.apache.org> In-Reply-To: <7a4c2bbbee014f65a548d11a922e2c0b@git.apache.org> References: <7a4c2bbbee014f65a548d11a922e2c0b@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [41/50] [abbrv] hadoop git commit: HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng) HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/38fa860f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/38fa860f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/38fa860f Branch: refs/heads/HDFS-7285 Commit: 38fa860f81143cc5ab8c77fb1c31c17135a0871f Parents: 4a94585 Author: Vinayakumar B Authored: Tue Apr 7 16:05:22 2015 +0530 Committer: Zhe Zhang Committed: Mon Apr 13 13:09:58 2015 -0700 ---------------------------------------------------------------------- .../hadoop-common/CHANGES-HDFS-EC-7285.txt | 3 + .../hadoop/io/erasurecode/ECBlockGroup.java | 18 ++++ .../erasurecode/codec/AbstractErasureCodec.java | 88 +++++++++++++++++++ .../io/erasurecode/codec/ErasureCodec.java | 56 ++++++++++++ .../io/erasurecode/codec/RSErasureCodec.java | 38 +++++++++ .../io/erasurecode/codec/XORErasureCodec.java | 45 ++++++++++ .../erasurecode/coder/AbstractErasureCoder.java | 7 ++ .../io/erasurecode/coder/ErasureCoder.java | 7 ++ .../io/erasurecode/grouper/BlockGrouper.java | 90 ++++++++++++++++++++ 9 files changed, 352 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt index 7716728..c72394e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt @@ -37,3 +37,6 @@ HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng ( Kai Zheng ) + + HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code + ( Kai Zheng via vinayakumarb ) http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java index 2c851a5..0a86907 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java @@ -79,4 +79,22 @@ public class ECBlockGroup { return false; } + /** + * Get erased blocks count + * @return + */ + public int getErasedCount() { + int erasedCount = 0; + + for (ECBlock dataBlock : dataBlocks) { + if (dataBlock.isErased()) erasedCount++; + } + + for (ECBlock parityBlock : parityBlocks) { + if (parityBlock.isErased()) erasedCount++; + } + + return erasedCount; + } + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java new file mode 100644 index 0000000..9993786 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.coder.*; +import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper; + +/** + * Abstract Erasure Codec that implements {@link ErasureCodec}. + */ +public abstract class AbstractErasureCodec extends Configured + implements ErasureCodec { + + private ECSchema schema; + + @Override + public void setSchema(ECSchema schema) { + this.schema = schema; + } + + public String getName() { + return schema.getCodecName(); + } + + protected ECSchema getSchema() { + return schema; + } + + @Override + public BlockGrouper createBlockGrouper() { + BlockGrouper blockGrouper = new BlockGrouper(); + blockGrouper.setSchema(getSchema()); + + return blockGrouper; + } + + @Override + public ErasureCoder createEncoder() { + ErasureCoder encoder = doCreateEncoder(); + prepareErasureCoder(encoder); + return encoder; + } + + /** + * Create a new encoder instance to be initialized afterwards. + * @return encoder + */ + protected abstract ErasureCoder doCreateEncoder(); + + @Override + public ErasureCoder createDecoder() { + ErasureCoder decoder = doCreateDecoder(); + prepareErasureCoder(decoder); + return decoder; + } + + /** + * Create a new decoder instance to be initialized afterwards. + * @return decoder + */ + protected abstract ErasureCoder doCreateDecoder(); + + private void prepareErasureCoder(ErasureCoder erasureCoder) { + if (getSchema() == null) { + throw new RuntimeException("No schema been set yet"); + } + + erasureCoder.setConf(getConf()); + erasureCoder.initialize(getSchema()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java new file mode 100644 index 0000000..e639484 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper; + +/** + * Erasure Codec API that's to cover the essential specific aspects of a code. + * Currently it cares only block grouper and erasure coder. In future we may + * add more aspects here to make the behaviors customizable. + */ +public interface ErasureCodec extends Configurable { + + /** + * Set EC schema to be used by this codec. + * @param schema + */ + public void setSchema(ECSchema schema); + + /** + * Create block grouper + * @return block grouper + */ + public BlockGrouper createBlockGrouper(); + + /** + * Create Erasure Encoder + * @return erasure encoder + */ + public ErasureCoder createEncoder(); + + /** + * Create Erasure Decoder + * @return erasure decoder + */ + public ErasureCoder createDecoder(); + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java new file mode 100644 index 0000000..9e91b60 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder; +import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder; + +/** + * A Reed-Solomon erasure codec. + */ +public class RSErasureCodec extends AbstractErasureCodec { + + @Override + protected ErasureCoder doCreateEncoder() { + return new RSErasureEncoder(); + } + + @Override + protected ErasureCoder doCreateDecoder() { + return new RSErasureDecoder(); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java new file mode 100644 index 0000000..0f726d7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder; +import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder; + +/** + * A XOR erasure codec. + */ +public class XORErasureCodec extends AbstractErasureCodec { + + @Override + public void setSchema(ECSchema schema) { + super.setSchema(schema); + assert(schema.getNumParityUnits() == 1); + } + + @Override + protected ErasureCoder doCreateEncoder() { + return new XORErasureEncoder(); + } + + @Override + protected ErasureCoder doCreateDecoder() { + return new XORErasureDecoder(); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java index 0e4de89..e5bf11a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java @@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder; import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory; import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder; @@ -105,6 +106,12 @@ public abstract class AbstractErasureCoder } @Override + public void initialize(ECSchema schema) { + initialize(schema.getNumDataUnits(), schema.getNumParityUnits(), + schema.getChunkSize()); + } + + @Override public int getNumDataUnits() { return numDataUnits; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java index fb90156..64a82ea 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java @@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECSchema; /** * An erasure coder to perform encoding or decoding given a group. Generally it @@ -45,6 +46,12 @@ public interface ErasureCoder extends Configurable { public void initialize(int numDataUnits, int numParityUnits, int chunkSize); /** + * Initialize with an EC schema. + * @param schema + */ + public void initialize(ECSchema schema); + + /** * The number of data input units for the coding. A unit can be a byte, * chunk or buffer or even a block. * @return count of data input units http://git-wip-us.apache.org/repos/asf/hadoop/blob/38fa860f/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java new file mode 100644 index 0000000..bdc1624 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/grouper/BlockGrouper.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.grouper; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * As part of a codec, to handle how to form a block group for encoding + * and provide instructions on how to recover erased blocks from a block group + */ +public class BlockGrouper { + + private ECSchema schema; + + /** + * Set EC schema. + * @param schema + */ + public void setSchema(ECSchema schema) { + this.schema = schema; + } + + /** + * Get EC schema. + * @return + */ + protected ECSchema getSchema() { + return schema; + } + + /** + * Get required data blocks count in a BlockGroup. + * @return count of required data blocks + */ + public int getRequiredNumDataBlocks() { + return schema.getNumDataUnits(); + } + + /** + * Get required parity blocks count in a BlockGroup. + * @return count of required parity blocks + */ + public int getRequiredNumParityBlocks() { + return schema.getNumParityUnits(); + } + + /** + * Calculating and organizing BlockGroup, to be called by ECManager + * @param dataBlocks Data blocks to compute parity blocks against + * @param parityBlocks To be computed parity blocks + * @return + */ + public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks, + ECBlock[] parityBlocks) { + + ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks); + return blockGroup; + } + + /** + * Given a BlockGroup, tell if any of the missing blocks can be recovered, + * to be called by ECManager + * @param blockGroup a blockGroup that may contain erased blocks but not sure + * recoverable or not + * @return true if any erased block recoverable, false otherwise + */ + public boolean anyRecoverable(ECBlockGroup blockGroup) { + int erasedCount = blockGroup.getErasedCount(); + + return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks(); + } + +}