hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From z..@apache.org
Subject [21/21] hadoop git commit: HDFS-7285. Erasure Coding Support inside HDFS.
Date Tue, 11 Aug 2015 17:45:11 GMT
HDFS-7285. Erasure Coding Support inside HDFS.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/dc0a6173
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/dc0a6173
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/dc0a6173

Branch: refs/heads/HDFS-7285-merge
Commit: dc0a6173fff4f782cee10a5268db9d2622d47ce4
Parents: fa1d84a
Author: Zhe Zhang <zhezhang@cloudera.com>
Authored: Thu Aug 6 23:24:03 2015 -0700
Committer: Zhe Zhang <zhezhang@cloudera.com>
Committed: Tue Aug 11 10:43:29 2015 -0700

----------------------------------------------------------------------
 .../hadoop-common/CHANGES-HDFS-EC-7285.txt      |   74 +
 .../hadoop/fs/CommonConfigurationKeys.java      |   15 +
 .../org/apache/hadoop/fs/FSOutputSummer.java    |    4 +
 .../main/java/org/apache/hadoop/fs/FsShell.java |    8 +-
 .../apache/hadoop/io/erasurecode/CodecUtil.java |  144 ++
 .../apache/hadoop/io/erasurecode/ECBlock.java   |   80 ++
 .../hadoop/io/erasurecode/ECBlockGroup.java     |  100 ++
 .../apache/hadoop/io/erasurecode/ECChunk.java   |   87 ++
 .../apache/hadoop/io/erasurecode/ECSchema.java  |  257 ++++
 .../hadoop/io/erasurecode/SchemaLoader.java     |  152 +++
 .../erasurecode/codec/AbstractErasureCodec.java |   51 +
 .../io/erasurecode/codec/ErasureCodec.java      |   49 +
 .../io/erasurecode/codec/RSErasureCodec.java    |   43 +
 .../io/erasurecode/codec/XORErasureCodec.java   |   44 +
 .../erasurecode/coder/AbstractErasureCoder.java |   62 +
 .../coder/AbstractErasureCodingStep.java        |   59 +
 .../coder/AbstractErasureDecoder.java           |  167 +++
 .../coder/AbstractErasureEncoder.java           |   60 +
 .../io/erasurecode/coder/ErasureCoder.java      |   77 ++
 .../io/erasurecode/coder/ErasureCodingStep.java |   55 +
 .../erasurecode/coder/ErasureDecodingStep.java  |   52 +
 .../erasurecode/coder/ErasureEncodingStep.java  |   49 +
 .../io/erasurecode/coder/RSErasureDecoder.java  |   67 +
 .../io/erasurecode/coder/RSErasureEncoder.java  |   67 +
 .../io/erasurecode/coder/XORErasureDecoder.java |   86 ++
 .../io/erasurecode/coder/XORErasureEncoder.java |   53 +
 .../io/erasurecode/grouper/BlockGrouper.java    |   90 ++
 .../rawcoder/AbstractRawErasureCoder.java       |  138 ++
 .../rawcoder/AbstractRawErasureDecoder.java     |  207 +++
 .../rawcoder/AbstractRawErasureEncoder.java     |  136 ++
 .../io/erasurecode/rawcoder/RSRawDecoder.java   |  216 +++
 .../io/erasurecode/rawcoder/RSRawEncoder.java   |   79 ++
 .../rawcoder/RSRawErasureCoderFactory.java      |   34 +
 .../erasurecode/rawcoder/RawErasureCoder.java   |   66 +
 .../rawcoder/RawErasureCoderFactory.java        |   42 +
 .../erasurecode/rawcoder/RawErasureDecoder.java |   88 ++
 .../erasurecode/rawcoder/RawErasureEncoder.java |   64 +
 .../io/erasurecode/rawcoder/XORRawDecoder.java  |   83 ++
 .../io/erasurecode/rawcoder/XORRawEncoder.java  |   77 ++
 .../rawcoder/XORRawErasureCoderFactory.java     |   34 +
 .../io/erasurecode/rawcoder/util/DumpUtil.java  |   85 ++
 .../erasurecode/rawcoder/util/GaloisField.java  |  561 ++++++++
 .../io/erasurecode/rawcoder/util/RSUtil.java    |   39 +
 .../hadoop/io/erasurecode/BufferAllocator.java  |   91 ++
 .../hadoop/io/erasurecode/TestCoderBase.java    |  500 +++++++
 .../hadoop/io/erasurecode/TestECSchema.java     |   51 +
 .../hadoop/io/erasurecode/TestSchemaLoader.java |   74 +
 .../erasurecode/coder/TestErasureCoderBase.java |  297 ++++
 .../erasurecode/coder/TestRSErasureCoder.java   |  126 ++
 .../io/erasurecode/coder/TestXORCoder.java      |   64 +
 .../io/erasurecode/rawcoder/TestRSRawCoder.java |  118 ++
 .../rawcoder/TestRSRawCoderBase.java            |   58 +
 .../erasurecode/rawcoder/TestRawCoderBase.java  |  232 ++++
 .../erasurecode/rawcoder/TestXORRawCoder.java   |   66 +
 hadoop-hdfs-project/hadoop-hdfs-client/pom.xml  |    1 +
 .../hdfs/client/HdfsClientConfigKeys.java       |   12 +
 .../hadoop/hdfs/protocol/ClientProtocol.java    |   27 +
 .../hadoop/hdfs/protocol/ErasureCodingZone.java |   66 +
 .../hadoop/hdfs/protocol/HdfsConstants.java     |   11 +
 .../hadoop/hdfs/protocol/HdfsFileStatus.java    |   16 +-
 .../hadoop/hdfs/protocol/LocatedBlock.java      |    8 +-
 .../hadoop/hdfs/protocol/LocatedBlocks.java     |   26 +-
 .../hdfs/protocol/LocatedStripedBlock.java      |   86 ++
 .../protocol/SnapshottableDirectoryStatus.java  |    2 +-
 .../apache/hadoop/hdfs/web/JsonUtilClient.java  |    4 +-
 .../src/main/proto/ClientNamenodeProtocol.proto |    7 +
 .../src/main/proto/erasurecoding.proto          |   71 +
 .../src/main/proto/hdfs.proto                   |   36 +-
 .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt        |  396 ++++++
 .../hadoop-hdfs/src/main/bin/hdfs               |    6 +
 .../org/apache/hadoop/hdfs/BlockReader.java     |   10 +-
 .../apache/hadoop/hdfs/BlockReaderLocal.java    |    5 +
 .../hadoop/hdfs/BlockReaderLocalLegacy.java     |    5 +
 .../java/org/apache/hadoop/hdfs/DFSClient.java  |  109 +-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java   |   19 +-
 .../org/apache/hadoop/hdfs/DFSInputStream.java  |   66 +-
 .../org/apache/hadoop/hdfs/DFSOutputStream.java |   23 +-
 .../java/org/apache/hadoop/hdfs/DFSPacket.java  |   34 +-
 .../hadoop/hdfs/DFSStripedInputStream.java      |  939 +++++++++++++
 .../hadoop/hdfs/DFSStripedOutputStream.java     |  653 +++++++++
 .../java/org/apache/hadoop/hdfs/DFSUtil.java    |    9 +-
 .../org/apache/hadoop/hdfs/DataStreamer.java    |   72 +-
 .../hadoop/hdfs/DistributedFileSystem.java      |   66 +
 .../apache/hadoop/hdfs/RemoteBlockReader.java   |    5 +
 .../apache/hadoop/hdfs/RemoteBlockReader2.java  |    5 +
 .../apache/hadoop/hdfs/StripedDataStreamer.java |  240 ++++
 .../apache/hadoop/hdfs/client/HdfsAdmin.java    |   40 +
 .../hadoop/hdfs/client/impl/DfsClientConf.java  |   22 +-
 .../hdfs/protocol/HdfsLocatedFileStatus.java    |    6 +-
 ...tNamenodeProtocolServerSideTranslatorPB.java |   68 +-
 .../ClientNamenodeProtocolTranslatorPB.java     |   79 +-
 .../DatanodeProtocolClientSideTranslatorPB.java |    2 +-
 .../DatanodeProtocolServerSideTranslatorPB.java |    2 +-
 .../apache/hadoop/hdfs/protocolPB/PBHelper.java |  326 ++++-
 .../hadoop/hdfs/server/balancer/Balancer.java   |    6 +-
 .../hadoop/hdfs/server/balancer/Dispatcher.java |  150 +-
 .../server/blockmanagement/BlockCollection.java |   17 +-
 .../server/blockmanagement/BlockIdManager.java  |   58 +-
 .../hdfs/server/blockmanagement/BlockInfo.java  |   73 +-
 .../blockmanagement/BlockInfoContiguous.java    |   45 +-
 .../BlockInfoContiguousUnderConstruction.java   |  189 +--
 .../blockmanagement/BlockInfoStriped.java       |  286 ++++
 .../BlockInfoStripedUnderConstruction.java      |  298 ++++
 .../BlockInfoUnderConstruction.java             |   84 ++
 .../server/blockmanagement/BlockManager.java    | 1281 ++++++++++++------
 .../blockmanagement/BlockPlacementPolicies.java |   54 +
 .../blockmanagement/BlockPlacementPolicy.java   |   26 +-
 .../BlockPlacementPolicyRackFaultTolarent.java  |  154 +++
 .../hdfs/server/blockmanagement/BlocksMap.java  |   25 +-
 .../blockmanagement/DatanodeDescriptor.java     |   62 +-
 .../server/blockmanagement/DatanodeManager.java |   24 +-
 .../blockmanagement/DatanodeStorageInfo.java    |   17 +-
 .../blockmanagement/DecommissionManager.java    |   83 +-
 .../ReplicaUnderConstruction.java               |  119 ++
 .../SequentialBlockGroupIdGenerator.java        |   85 ++
 .../SequentialBlockIdGenerator.java             |    6 +-
 .../blockmanagement/UnderReplicatedBlocks.java  |   51 +-
 .../hdfs/server/common/HdfsServerConstants.java |    5 +
 .../hdfs/server/datanode/BPOfferService.java    |    8 +
 .../hadoop/hdfs/server/datanode/DNConf.java     |   27 +
 .../hadoop/hdfs/server/datanode/DataNode.java   |   58 +-
 .../erasurecode/ErasureCodingWorker.java        |  988 ++++++++++++++
 .../apache/hadoop/hdfs/server/mover/Mover.java  |   36 +-
 .../namenode/ErasureCodingSchemaManager.java    |  127 ++
 .../namenode/ErasureCodingZoneManager.java      |  170 +++
 .../hdfs/server/namenode/FSDirAppendOp.java     |    5 +
 .../hdfs/server/namenode/FSDirAttrOp.java       |    7 +-
 .../hdfs/server/namenode/FSDirConcatOp.java     |    6 +
 .../server/namenode/FSDirErasureCodingOp.java   |  217 +++
 .../hdfs/server/namenode/FSDirRenameOp.java     |    2 +
 .../server/namenode/FSDirStatAndListingOp.java  |   27 +-
 .../hdfs/server/namenode/FSDirTruncateOp.java   |   16 +-
 .../hdfs/server/namenode/FSDirWriteFileOp.java  |  170 ++-
 .../hdfs/server/namenode/FSDirectory.java       |    6 +-
 .../hdfs/server/namenode/FSEditLogLoader.java   |  101 +-
 .../hdfs/server/namenode/FSImageFormat.java     |   22 +-
 .../server/namenode/FSImageFormatPBINode.java   |   57 +-
 .../server/namenode/FSImageFormatProtobuf.java  |    9 +-
 .../server/namenode/FSImageSerialization.java   |   13 +-
 .../hdfs/server/namenode/FSNamesystem.java      |  304 ++++-
 .../namenode/FileUnderConstructionFeature.java  |   10 +-
 .../hadoop/hdfs/server/namenode/INodeFile.java  |  152 ++-
 .../server/namenode/INodeFileAttributes.java    |   12 +-
 .../hdfs/server/namenode/LeaseManager.java      |    2 +-
 .../server/namenode/NameNodeLayoutVersion.java  |    3 +-
 .../hdfs/server/namenode/NameNodeRpcServer.java |   34 +-
 .../hdfs/server/namenode/NamenodeFsck.java      |  227 +++-
 .../hadoop/hdfs/server/namenode/Namesystem.java |   15 +-
 .../hadoop/hdfs/server/namenode/SafeMode.java   |    5 +-
 .../snapshot/FSImageFormatPBSnapshot.java       |   14 +-
 .../server/namenode/snapshot/FileDiffList.java  |    5 +-
 .../server/protocol/BlockECRecoveryCommand.java |  153 +++
 .../server/protocol/BlocksWithLocations.java    |   25 +
 .../hdfs/server/protocol/DatanodeProtocol.java  |    1 +
 .../hadoop/hdfs/tools/erasurecode/ECCli.java    |   48 +
 .../hdfs/tools/erasurecode/ECCommand.java       |  226 +++
 .../hadoop/hdfs/util/StripedBlockUtil.java      |  947 +++++++++++++
 .../src/main/proto/DatanodeProtocol.proto       |   10 +
 .../hadoop-hdfs/src/main/proto/fsimage.proto    |    3 +
 .../src/main/resources/hdfs-default.xml         |   31 +-
 .../hadoop/cli/CLITestCmdErasureCoding.java     |   38 +
 .../apache/hadoop/cli/TestErasureCodingCLI.java |  114 ++
 .../cli/util/CLICommandErasureCodingCli.java    |   21 +
 .../cli/util/ErasureCodingCliCmdExecutor.java   |   37 +
 .../apache/hadoop/hdfs/BlockReaderTestUtil.java |    7 +-
 .../org/apache/hadoop/hdfs/DFSTestUtil.java     |  182 ++-
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  |    2 -
 .../apache/hadoop/hdfs/StripedFileTestUtil.java |  261 ++++
 .../hadoop/hdfs/TestBlockReaderFactory.java     |   16 +-
 .../hadoop/hdfs/TestDFSClientRetries.java       |    6 +-
 .../hadoop/hdfs/TestDFSStripedInputStream.java  |  335 +++++
 .../hadoop/hdfs/TestDFSStripedOutputStream.java |  293 ++++
 .../TestDFSStripedOutputStreamWithFailure.java  |  329 +++++
 .../org/apache/hadoop/hdfs/TestDFSUtil.java     |    2 +-
 .../org/apache/hadoop/hdfs/TestECSchemas.java   |   54 +
 .../apache/hadoop/hdfs/TestEncryptionZones.java |    2 +-
 .../hadoop/hdfs/TestErasureCodingZones.java     |  223 +++
 .../hadoop/hdfs/TestFileStatusWithECschema.java |   65 +
 .../java/org/apache/hadoop/hdfs/TestLease.java  |    4 +-
 .../hdfs/TestReadStripedFileWithDecoding.java   |  358 +++++
 .../TestReadStripedFileWithMissingBlocks.java   |  150 ++
 .../hadoop/hdfs/TestRecoverStripedFile.java     |  400 ++++++
 .../org/apache/hadoop/hdfs/TestSafeMode.java    |    4 +-
 .../hdfs/TestSafeModeWithStripedFile.java       |  150 ++
 .../hadoop/hdfs/TestWriteReadStripedFile.java   |  251 ++++
 .../hdfs/TestWriteStripedFileWithFailure.java   |  162 +++
 .../hadoop/hdfs/protocol/TestLayoutVersion.java |    3 +-
 .../hadoop/hdfs/protocolPB/TestPBHelper.java    |  170 ++-
 .../hdfs/server/balancer/TestBalancer.java      |   87 ++
 .../blockmanagement/BlockManagerTestUtil.java   |   10 +-
 .../server/blockmanagement/TestBlockInfo.java   |    2 +-
 .../blockmanagement/TestBlockInfoStriped.java   |  250 ++++
 .../TestBlockInfoUnderConstruction.java         |    3 +-
 .../blockmanagement/TestBlockManager.java       |   63 +-
 .../blockmanagement/TestBlockTokenWithDFS.java  |  422 +++---
 .../TestBlockTokenWithDFSStriped.java           |  115 ++
 .../blockmanagement/TestHeartbeatHandling.java  |    1 -
 .../TestNameNodePrunesMissingStorages.java      |    5 +-
 .../server/blockmanagement/TestNodeCount.java   |    2 +-
 .../TestOverReplicatedBlocks.java               |    4 +-
 .../blockmanagement/TestReplicationPolicy.java  |   25 +-
 .../TestSequentialBlockGroupId.java             |  222 +++
 .../TestUnderReplicatedBlockQueues.java         |   62 +
 .../server/datanode/SimulatedFSDataset.java     |    2 +-
 .../datanode/TestIncrementalBrVariations.java   |   14 +-
 .../hadoop/hdfs/server/mover/TestMover.java     |  125 +-
 .../hdfs/server/namenode/NameNodeAdapter.java   |    9 +-
 .../TestAddOverReplicatedStripedBlocks.java     |  266 ++++
 .../server/namenode/TestAddStripedBlocks.java   |  433 ++++++
 .../hdfs/server/namenode/TestDeadDatanode.java  |    3 +-
 .../server/namenode/TestFSEditLogLoader.java    |  273 ++++
 .../hdfs/server/namenode/TestFSImage.java       |  345 ++++-
 .../hdfs/server/namenode/TestFileTruncate.java  |    4 +-
 .../hadoop/hdfs/server/namenode/TestFsck.java   |   83 +-
 .../hdfs/server/namenode/TestINodeFile.java     |    2 +-
 .../namenode/TestQuotaWithStripedBlocks.java    |  125 ++
 .../namenode/TestRecoverStripedBlocks.java      |  169 +++
 .../server/namenode/TestStripedINodeFile.java   |  284 ++++
 .../namenode/snapshot/SnapshotTestHelper.java   |    2 +-
 ...TestOfflineImageViewerWithStripedBlocks.java |  162 +++
 .../hadoop/hdfs/util/TestStripedBlockUtil.java  |  279 ++++
 .../apache/hadoop/hdfs/web/TestJsonUtil.java    |    2 +-
 .../test/resources/testErasureCodingConf.xml    |  377 ++++++
 223 files changed, 23039 insertions(+), 1587 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
new file mode 100644
index 0000000..1f3006e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt
@@ -0,0 +1,74 @@
+  BREAKDOWN OF HADOOP-11264 SUBTASKS AND RELATED JIRAS (Common part of HDFS-7285)
+
+    HADOOP-11514. Raw Erasure Coder API for concrete encoding and decoding
+    (Kai Zheng via umamahesh)
+
+    HADOOP-11534. Minor improvements for raw erasure coders
+    ( Kai Zheng via vinayakumarb )
+
+    HADOOP-11541. Raw XOR coder
+    ( Kai Zheng )
+
+    HADOOP-11542. Raw Reed-Solomon coder in pure Java. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11643. Define EC schema API for ErasureCodec. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11646. Erasure Coder API for encoding and decoding of block group
+    ( Kai Zheng via vinayakumarb )
+
+    HADOOP-11705. Make erasure coder configurable. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11706. Refine a little bit erasure coder API. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11707. Add factory to create raw erasure coder. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11647. Reed-Solomon ErasureCoder. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11782 Correct two thrown messages in ECSchema class. Contributed by Xinwei Qin
+    ( Xinwei Qin via Kai Zheng )
+
+    HADOOP-11740. Combine erasure encoder and decoder interfaces (Zhe Zhang)
+
+    HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
+    ( Kai Zheng )
+
+    HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
+    ( Kai Zheng via vinayakumarb )
+  
+    HADOOP-11818. Minor improvements for erasurecode classes. (Rakesh R via Kai Zheng)
+
+    HADOOP-11841. Remove unused ecschema-def.xml files.  (szetszwo)
+
+    HADOOP-11921. Enhance tests for erasure coders. (Kai Zheng via Zhe Zhang)
+
+    HADOOP-11920. Refactor some codes for erasure coders. (Kai Zheng via Zhe Zhang)
+
+    HADOOP-11566. Add tests and fix for erasure coders to recover erased parity 
+    units. (Kai Zheng via Zhe Zhang)
+
+    HADOOP-11938. Enhance ByteBuffer version encode/decode API of raw erasure 
+    coder. (Kai Zheng via Zhe Zhang)
+
+    HADOOP-12013. Generate fixed data to perform erasure coder test. (Kai Zheng)
+
+    HADOOP-12029. Remove chunkSize from ECSchema as its not required for coders
+    (vinayakumarb)
+
+    HADOOP-11847. Enhance raw coder allowing to read least required inputs in decoding.
+    (Kai Zheng)
+
+    HADOOP-12011. Allow to dump verbose information to ease debugging in raw erasure coders
+    (Kai Zheng)
+
+    HADOOP-12065. Using more meaningful keys in EC schema. (Kai Zheng)
+
+    HDFS-8557. Allow to configure RS and XOR raw coders (Kai Zheng)
+
+    HADOOP-12060. Fix ByteBuffer usage for raw erasure coders. (Kai Zheng via
+    jing9)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index 2721466..9588254 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -137,6 +137,21 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
       false;
 
   /**
+   * Erasure Coding configuration family
+   */
+
+  /** Supported erasure codec classes */
+  public static final String IO_ERASURECODE_CODECS_KEY = "io.erasurecode.codecs";
+
+  /** Raw coder factory for the RS codec. */
+  public static final String IO_ERASURECODE_CODEC_RS_RAWCODER_KEY =
+      "io.erasurecode.codec.rs.rawcoder";
+
+  /** Raw coder factory for the XOR codec. */
+  public static final String IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY =
+      "io.erasurecode.codec.xor.rawcoder";
+
+  /**
    * Service Authorization
    */
   public static final String 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
index bdc5585..a8a7494 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
@@ -196,6 +196,10 @@ abstract public class FSOutputSummer extends OutputStream {
     return sum.getChecksumSize();
   }
 
+  protected DataChecksum getDataChecksum() {
+    return sum;
+  }
+
   protected TraceScope createWriteTraceScope() {
     return NullScope.INSTANCE;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
index a0510be..bee7c8c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
@@ -124,6 +124,10 @@ public class FsShell extends Configured implements Tool {
     return getTrash().getCurrentTrashDir();
   }
 
+  protected String getUsagePrefix() {
+    return usagePrefix;
+  }
+
   // NOTE: Usage/Help are inner classes to allow access to outer methods
   // that access commandFactory
   
@@ -207,7 +211,7 @@ public class FsShell extends Configured implements Tool {
       }
     } else {
       // display help or usage for all commands 
-      out.println(usagePrefix);
+      out.println(getUsagePrefix());
       
       // display list of short usages
       ArrayList<Command> instances = new ArrayList<Command>();
@@ -231,7 +235,7 @@ public class FsShell extends Configured implements Tool {
   }
 
   private void printInstanceUsage(PrintStream out, Command instance) {
-    out.println(usagePrefix + " " + instance.getUsage());
+    out.println(getUsagePrefix() + " " + instance.getUsage());
   }
 
   private void printInstanceHelp(PrintStream out, Command instance) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java
new file mode 100644
index 0000000..027d58b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.io.erasurecode.rawcoder.*;
+
+/**
+ * A codec & coder utility to help create raw coders conveniently.
+ */
+public final class CodecUtil {
+
+  private CodecUtil() { }
+
+  /**
+   * Create RS raw encoder according to configuration.
+   * @param conf configuration possibly with some items to configure the coder
+   * @param numDataUnits number of data units in a coding group
+   * @param numParityUnits number of parity units in a coding group
+   * @return raw encoder
+   */
+  public static RawErasureEncoder createRSRawEncoder(
+      Configuration conf, int numDataUnits, int numParityUnits) {
+    RawErasureCoder rawCoder = createRawCoder(conf,
+        CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY,
+        true, numDataUnits, numParityUnits);
+    if (rawCoder == null) {
+      rawCoder = new RSRawEncoder(numDataUnits, numParityUnits);
+    }
+
+    return (RawErasureEncoder) rawCoder;
+  }
+
+  /**
+   * Create RS raw decoder according to configuration.
+   * @param conf configuration possibly with some items to configure the coder
+   * @param numDataUnits number of data units in a coding group
+   * @param numParityUnits number of parity units in a coding group
+   * @return raw decoder
+   */
+  public static RawErasureDecoder createRSRawDecoder(
+      Configuration conf, int numDataUnits, int numParityUnits) {
+    RawErasureCoder rawCoder = createRawCoder(conf,
+        CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY,
+        false, numDataUnits, numParityUnits);
+    if (rawCoder == null) {
+      rawCoder = new RSRawDecoder(numDataUnits, numParityUnits);
+    }
+
+    return (RawErasureDecoder) rawCoder;
+  }
+
+  /**
+   * Create XOR raw encoder according to configuration.
+   * @param conf configuration possibly with some items to configure the coder
+   * @param numDataUnits number of data units in a coding group
+   * @param numParityUnits number of parity units in a coding group
+   * @return raw encoder
+   */
+  public static RawErasureEncoder createXORRawEncoder(
+      Configuration conf, int numDataUnits, int numParityUnits) {
+    RawErasureCoder rawCoder = createRawCoder(conf,
+        CommonConfigurationKeys.IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY,
+        true, numDataUnits, numParityUnits);
+    if (rawCoder == null) {
+      rawCoder = new XORRawEncoder(numDataUnits, numParityUnits);
+    }
+
+    return (RawErasureEncoder) rawCoder;
+  }
+
+  /**
+   * Create XOR raw decoder according to configuration.
+   * @param conf configuration possibly with some items to configure the coder
+   * @param numDataUnits number of data units in a coding group
+   * @param numParityUnits number of parity units in a coding group
+   * @return raw decoder
+   */
+  public static RawErasureDecoder createXORRawDecoder(
+      Configuration conf, int numDataUnits, int numParityUnits) {
+    RawErasureCoder rawCoder = createRawCoder(conf,
+        CommonConfigurationKeys.IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY,
+        false, numDataUnits, numParityUnits);
+    if (rawCoder == null) {
+      rawCoder = new XORRawDecoder(numDataUnits, numParityUnits);
+    }
+
+    return (RawErasureDecoder) rawCoder;
+  }
+
+  /**
+   * Create raw coder using specified conf and raw coder factory key.
+   * @param conf configuration possibly with some items to configure the coder
+   * @param rawCoderFactoryKey configuration key to find the raw coder factory
+   * @param isEncoder is encoder or not we're going to create
+   * @param numDataUnits number of data units in a coding group
+   * @param numParityUnits number of parity units in a coding group
+   * @return raw coder
+   */
+  public static RawErasureCoder createRawCoder(Configuration conf,
+      String rawCoderFactoryKey, boolean isEncoder, int numDataUnits,
+                                               int numParityUnits) {
+
+    if (conf == null) {
+      return null;
+    }
+
+    Class<? extends RawErasureCoderFactory> factClass = null;
+    factClass = conf.getClass(rawCoderFactoryKey,
+        factClass, RawErasureCoderFactory.class);
+
+    if (factClass == null) {
+      return null;
+    }
+
+    RawErasureCoderFactory fact;
+    try {
+      fact = factClass.newInstance();
+    } catch (InstantiationException e) {
+      throw new RuntimeException("Failed to create raw coder", e);
+    } catch (IllegalAccessException e) {
+      throw new RuntimeException("Failed to create raw coder", e);
+    }
+
+    return isEncoder ? fact.createEncoder(numDataUnits, numParityUnits) :
+            fact.createDecoder(numDataUnits, numParityUnits);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java
new file mode 100644
index 0000000..5c0a160
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+/**
+ * A wrapper of block level data source/output that {@link ECChunk}s can be
+ * extracted from. For HDFS, it can be an HDFS block (250MB). Note it only cares
+ * about erasure coding specific logic thus avoids coupling with any HDFS block
+ * details. We can have something like HdfsBlock extend it.
+ */
+public class ECBlock {
+
+  private boolean isParity;
+  private boolean isErased;
+
+  /**
+   * A default constructor. isParity and isErased are false by default.
+   */
+  public ECBlock() {
+    this(false, false);
+  }
+
+  /**
+   * A constructor specifying isParity and isErased.
+   * @param isParity is a parity block
+   * @param isErased is erased or not
+   */
+  public ECBlock(boolean isParity, boolean isErased) {
+    this.isParity = isParity;
+    this.isErased = isErased;
+  }
+
+  /**
+   * Set true if it's for a parity block.
+   * @param isParity is parity or not
+   */
+  public void setParity(boolean isParity) {
+    this.isParity = isParity;
+  }
+
+  /**
+   * Set true if the block is missing.
+   * @param isErased is erased or not
+   */
+  public void setErased(boolean isErased) {
+    this.isErased = isErased;
+  }
+
+  /**
+   *
+   * @return true if it's parity block, otherwise false
+   */
+  public boolean isParity() {
+    return isParity;
+  }
+
+  /**
+   *
+   * @return true if it's erased due to erasure, otherwise false
+   */
+  public boolean isErased() {
+    return isErased;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
new file mode 100644
index 0000000..91e4fb8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+/**
+ * A group of blocks or {@link ECBlock} incurred in an erasure coding task.
+ */
+public class ECBlockGroup {
+
+  private ECBlock[] dataBlocks;
+  private ECBlock[] parityBlocks;
+
+  /**
+   * A constructor specifying data blocks and parity blocks.
+   * @param dataBlocks data blocks in the group
+   * @param parityBlocks parity blocks in the group
+   */
+  public ECBlockGroup(ECBlock[] dataBlocks, ECBlock[] parityBlocks) {
+    this.dataBlocks = dataBlocks;
+    this.parityBlocks = parityBlocks;
+  }
+
+  /**
+   * Get data blocks
+   * @return data blocks
+   */
+  public ECBlock[] getDataBlocks() {
+    return dataBlocks;
+  }
+
+  /**
+   * Get parity blocks
+   * @return parity blocks
+   */
+  public ECBlock[] getParityBlocks() {
+    return parityBlocks;
+  }
+
+  /**
+   * Any erased data block?
+   * @return true if any erased data block, false otherwise
+   */
+  public boolean anyErasedDataBlock() {
+    for (int i = 0; i < dataBlocks.length; ++i) {
+      if (dataBlocks[i].isErased()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * Any erased parity block?
+   * @return true if any erased parity block, false otherwise
+   */
+  public boolean anyErasedParityBlock() {
+    for (int i = 0; i < parityBlocks.length; ++i) {
+      if (parityBlocks[i].isErased()) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * Get erased blocks count
+   * @return erased count of blocks
+   */
+  public int getErasedCount() {
+    int erasedCount = 0;
+
+    for (ECBlock dataBlock : dataBlocks) {
+      if (dataBlock.isErased()) erasedCount++;
+    }
+
+    for (ECBlock parityBlock : parityBlocks) {
+      if (parityBlock.isErased()) erasedCount++;
+    }
+
+    return erasedCount;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java
new file mode 100644
index 0000000..d0120d8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import java.nio.ByteBuffer;
+
+/**
+ * A wrapper for ByteBuffer or bytes array for an erasure code chunk.
+ */
+public class ECChunk {
+
+  private ByteBuffer chunkBuffer;
+
+  /**
+   * Wrapping a ByteBuffer
+   * @param buffer buffer to be wrapped by the chunk
+   */
+  public ECChunk(ByteBuffer buffer) {
+    this.chunkBuffer = buffer;
+  }
+
+  /**
+   * Wrapping a bytes array
+   * @param buffer buffer to be wrapped by the chunk
+   */
+  public ECChunk(byte[] buffer) {
+    this.chunkBuffer = ByteBuffer.wrap(buffer);
+  }
+
+  /**
+   * Convert to ByteBuffer
+   * @return ByteBuffer
+   */
+  public ByteBuffer getBuffer() {
+    return chunkBuffer;
+  }
+
+  /**
+   * Convert an array of this chunks to an array of ByteBuffers
+   * @param chunks chunks to convert into buffers
+   * @return an array of ByteBuffers
+   */
+  public static ByteBuffer[] toBuffers(ECChunk[] chunks) {
+    ByteBuffer[] buffers = new ByteBuffer[chunks.length];
+
+    ECChunk chunk;
+    for (int i = 0; i < chunks.length; i++) {
+      chunk = chunks[i];
+      if (chunk == null) {
+        buffers[i] = null;
+      } else {
+        buffers[i] = chunk.getBuffer();
+      }
+    }
+
+    return buffers;
+  }
+
+  /**
+   * Convert to a bytes array, just for test usage.
+   * @return bytes array
+   */
+  public byte[] toBytesArray() {
+    byte[] bytesArr = new byte[chunkBuffer.remaining()];
+    // Avoid affecting the original one
+    chunkBuffer.mark();
+    chunkBuffer.get(bytesArr);
+    chunkBuffer.reset();
+
+    return bytesArr;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java
new file mode 100644
index 0000000..fb02476
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java
@@ -0,0 +1,257 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Erasure coding schema to housekeeper relevant information.
+ */
+public final class ECSchema {
+  public static final String NUM_DATA_UNITS_KEY = "numDataUnits";
+  public static final String NUM_PARITY_UNITS_KEY = "numParityUnits";
+  public static final String CODEC_NAME_KEY = "codec";
+
+  /**
+   * A friendly and understandable name that can mean what's it, also serves as
+   * the identifier that distinguish it from other schemas.
+   */
+  private final String schemaName;
+
+  /**
+   * The erasure codec name associated.
+   */
+  private final String codecName;
+
+  /**
+   * Number of source data units coded
+   */
+  private final int numDataUnits;
+
+  /**
+   * Number of parity units generated in a coding
+   */
+  private final int numParityUnits;
+
+  /*
+   * An erasure code can have its own specific advanced parameters, subject to
+   * itself to interpret these key-value settings.
+   */
+  private final Map<String, String> extraOptions;
+
+  /**
+   * Constructor with schema name and provided all options. Note the options may
+   * contain additional information for the erasure codec to interpret further.
+   * @param schemaName schema name
+   * @param allOptions all schema options
+   */
+  public ECSchema(String schemaName, Map<String, String> allOptions) {
+    assert (schemaName != null && ! schemaName.isEmpty());
+
+    this.schemaName = schemaName;
+
+    if (allOptions == null || allOptions.isEmpty()) {
+      throw new IllegalArgumentException("No schema options are provided");
+    }
+
+    this.codecName = allOptions.get(CODEC_NAME_KEY);
+    if (codecName == null || codecName.isEmpty()) {
+      throw new IllegalArgumentException("No codec option is provided");
+    }
+
+    int tmpNumDataUnits = extractIntOption(NUM_DATA_UNITS_KEY, allOptions);
+    int tmpNumParityUnits = extractIntOption(NUM_PARITY_UNITS_KEY, allOptions);
+    if (tmpNumDataUnits < 0 || tmpNumParityUnits < 0) {
+      throw new IllegalArgumentException(
+          "No good option for numDataUnits or numParityUnits found ");
+    }
+    this.numDataUnits = tmpNumDataUnits;
+    this.numParityUnits = tmpNumParityUnits;
+
+    allOptions.remove(CODEC_NAME_KEY);
+    allOptions.remove(NUM_DATA_UNITS_KEY);
+    allOptions.remove(NUM_PARITY_UNITS_KEY);
+    // After some cleanup
+    this.extraOptions = Collections.unmodifiableMap(allOptions);
+  }
+
+  /**
+   * Constructor with key parameters provided.
+   * @param schemaName schema name
+   * @param codecName codec name
+   * @param numDataUnits number of data units used in the schema
+   * @param numParityUnits number os parity units used in the schema
+   */
+  public ECSchema(String schemaName, String codecName,
+                  int numDataUnits, int numParityUnits) {
+    this(schemaName, codecName, numDataUnits, numParityUnits, null);
+  }
+
+  /**
+   * Constructor with key parameters provided. Note the extraOptions may contain
+   * additional information for the erasure codec to interpret further.
+   * @param schemaName schema name
+   * @param codecName codec name
+   * @param numDataUnits number of data units used in the schema
+   * @param numParityUnits number os parity units used in the schema
+   * @param extraOptions extra options to configure the codec
+   */
+  public ECSchema(String schemaName, String codecName, int numDataUnits,
+                  int numParityUnits, Map<String, String> extraOptions) {
+
+    assert (schemaName != null && ! schemaName.isEmpty());
+    assert (codecName != null && ! codecName.isEmpty());
+    assert (numDataUnits > 0 && numParityUnits > 0);
+
+    this.schemaName = schemaName;
+    this.codecName = codecName;
+    this.numDataUnits = numDataUnits;
+    this.numParityUnits = numParityUnits;
+
+    if (extraOptions == null) {
+      extraOptions = new HashMap<>();
+    }
+
+    // After some cleanup
+    this.extraOptions = Collections.unmodifiableMap(extraOptions);
+  }
+
+  private int extractIntOption(String optionKey, Map<String, String> options) {
+    int result = -1;
+
+    try {
+      if (options.containsKey(optionKey)) {
+        result = Integer.parseInt(options.get(optionKey));
+        if (result <= 0) {
+          throw new IllegalArgumentException("Bad option value " + result +
+              " found for " + optionKey);
+        }
+      }
+    } catch (NumberFormatException e) {
+      throw new IllegalArgumentException("Option value " +
+          options.get(optionKey) + " for " + optionKey +
+          " is found. It should be an integer");
+    }
+
+    return result;
+  }
+
+  /**
+   * Get the schema name
+   * @return schema name
+   */
+  public String getSchemaName() {
+    return schemaName;
+  }
+
+  /**
+   * Get the codec name
+   * @return codec name
+   */
+  public String getCodecName() {
+    return codecName;
+  }
+
+  /**
+   * Get extra options specific to a erasure code.
+   * @return extra options
+   */
+  public Map<String, String> getExtraOptions() {
+    return extraOptions;
+  }
+
+  /**
+   * Get required data units count in a coding group
+   * @return count of data units
+   */
+  public int getNumDataUnits() {
+    return numDataUnits;
+  }
+
+  /**
+   * Get required parity units count in a coding group
+   * @return count of parity units
+   */
+  public int getNumParityUnits() {
+    return numParityUnits;
+  }
+
+  /**
+   * Make a meaningful string representation for log output.
+   * @return string representation
+   */
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("ECSchema=[");
+
+    sb.append("Name=" + schemaName + ", ");
+    sb.append("Codec=" + codecName + ", ");
+    sb.append(NUM_DATA_UNITS_KEY + "=" + numDataUnits + ", ");
+    sb.append(NUM_PARITY_UNITS_KEY + "=" + numParityUnits);
+    sb.append((extraOptions.isEmpty() ? "" : ", "));
+
+    int i = 0;
+    for (String opt : extraOptions.keySet()) {
+      sb.append(opt + "=" + extraOptions.get(opt) +
+          (++i < extraOptions.size() ? ", " : ""));
+    }
+
+    sb.append("]");
+
+    return sb.toString();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    ECSchema ecSchema = (ECSchema) o;
+
+    if (numDataUnits != ecSchema.numDataUnits) {
+      return false;
+    }
+    if (numParityUnits != ecSchema.numParityUnits) {
+      return false;
+    }
+    if (!schemaName.equals(ecSchema.schemaName)) {
+      return false;
+    }
+    if (!codecName.equals(ecSchema.codecName)) {
+      return false;
+    }
+    return extraOptions.equals(ecSchema.extraOptions);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = schemaName.hashCode();
+    result = 31 * result + codecName.hashCode();
+    result = 31 * result + extraOptions.hashCode();
+    result = 31 * result + numDataUnits;
+    result = 31 * result + numParityUnits;
+
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
new file mode 100644
index 0000000..fce46f8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
+import org.xml.sax.SAXException;
+
+/**
+ * A EC schema loading utility that loads predefined EC schemas from XML file
+ */
+public class SchemaLoader {
+  private static final Logger LOG = LoggerFactory.getLogger(
+      SchemaLoader.class.getName());
+
+  /**
+   * Load predefined ec schemas from configuration file. This file is
+   * expected to be in the XML format.
+   */
+  public List<ECSchema> loadSchema(String schemaFilePath) {
+    File confFile = getSchemaFile(schemaFilePath);
+    if (confFile == null) {
+      LOG.warn("Not found any predefined EC schema file");
+      return Collections.emptyList();
+    }
+
+    try {
+      return loadSchema(confFile);
+    } catch (ParserConfigurationException e) {
+      throw new RuntimeException("Failed to load schema file: " + confFile);
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to load schema file: " + confFile);
+    } catch (SAXException e) {
+      throw new RuntimeException("Failed to load schema file: " + confFile);
+    }
+  }
+
+  private List<ECSchema> loadSchema(File schemaFile)
+      throws ParserConfigurationException, IOException, SAXException {
+
+    LOG.info("Loading predefined EC schema file {}", schemaFile);
+
+    // Read and parse the schema file.
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+    dbf.setIgnoringComments(true);
+    DocumentBuilder builder = dbf.newDocumentBuilder();
+    Document doc = builder.parse(schemaFile);
+    Element root = doc.getDocumentElement();
+
+    if (!"schemas".equals(root.getTagName())) {
+      throw new RuntimeException("Bad EC schema config file: " +
+          "top-level element not <schemas>");
+    }
+
+    NodeList elements = root.getChildNodes();
+    List<ECSchema> schemas = new ArrayList<ECSchema>();
+    for (int i = 0; i < elements.getLength(); i++) {
+      Node node = elements.item(i);
+      if (node instanceof Element) {
+        Element element = (Element) node;
+        if ("schema".equals(element.getTagName())) {
+          ECSchema schema = loadSchema(element);
+            schemas.add(schema);
+        } else {
+          LOG.warn("Bad element in EC schema configuration file: {}",
+              element.getTagName());
+        }
+      }
+    }
+
+    return schemas;
+  }
+
+  /**
+   * Path to the XML file containing predefined ec schemas. If the path is
+   * relative, it is searched for in the classpath.
+   */
+  private File getSchemaFile(String schemaFilePath) {
+    File schemaFile = new File(schemaFilePath);
+    if (! schemaFile.isAbsolute()) {
+      URL url = Thread.currentThread().getContextClassLoader()
+          .getResource(schemaFilePath);
+      if (url == null) {
+        LOG.warn("{} not found on the classpath.", schemaFilePath);
+        schemaFile = null;
+      } else if (! url.getProtocol().equalsIgnoreCase("file")) {
+        throw new RuntimeException(
+            "EC predefined schema file " + url +
+                " found on the classpath is not on the local filesystem.");
+      } else {
+        schemaFile = new File(url.getPath());
+      }
+    }
+
+    return schemaFile;
+  }
+
+  /**
+   * Loads a schema from a schema element in the configuration file
+   */
+  private ECSchema loadSchema(Element element) {
+    String schemaName = element.getAttribute("name");
+    Map<String, String> ecOptions = new HashMap<String, String>();
+    NodeList fields = element.getChildNodes();
+
+    for (int i = 0; i < fields.getLength(); i++) {
+      Node fieldNode = fields.item(i);
+      if (fieldNode instanceof Element) {
+        Element field = (Element) fieldNode;
+        String tagName = field.getTagName();
+        String value = ((Text) field.getFirstChild()).getData().trim();
+        ecOptions.put(tagName, value);
+      }
+    }
+
+    ECSchema schema = new ECSchema(schemaName, ecOptions);
+    return schema;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
new file mode 100644
index 0000000..0cacfbc
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Abstract Erasure Codec that implements {@link ErasureCodec}.
+ */
+public abstract class AbstractErasureCodec extends Configured
+    implements ErasureCodec {
+
+  private final ECSchema schema;
+
+  public AbstractErasureCodec(ECSchema schema) {
+    this.schema = schema;
+  }
+
+  public String getName() {
+    return schema.getCodecName();
+  }
+
+  public ECSchema getSchema() {
+    return schema;
+  }
+
+  @Override
+  public BlockGrouper createBlockGrouper() {
+    BlockGrouper blockGrouper = new BlockGrouper();
+    blockGrouper.setSchema(getSchema());
+
+    return blockGrouper;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
new file mode 100644
index 0000000..9aa3db2
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
+
+/**
+ * Erasure Codec API that's to cover the essential specific aspects of a code.
+ * Currently it cares only block grouper and erasure coder. In future we may
+ * add more aspects here to make the behaviors customizable.
+ */
+public interface ErasureCodec extends Configurable {
+
+  /**
+   * Create block grouper
+   * @return block grouper
+   */
+  public BlockGrouper createBlockGrouper();
+
+  /**
+   * Create Erasure Encoder
+   * @return erasure encoder
+   */
+  public ErasureCoder createEncoder();
+
+  /**
+   * Create Erasure Decoder
+   * @return erasure decoder
+   */
+  public ErasureCoder createDecoder();
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
new file mode 100644
index 0000000..6edd638
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
+
+/**
+ * A Reed-Solomon erasure codec.
+ */
+public class RSErasureCodec extends AbstractErasureCodec {
+
+  public RSErasureCodec(ECSchema schema) {
+    super(schema);
+  }
+
+  @Override
+  public ErasureCoder createEncoder() {
+    return new RSErasureEncoder(getSchema());
+  }
+
+  @Override
+  public ErasureCoder createDecoder() {
+    return new RSErasureDecoder(getSchema());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
new file mode 100644
index 0000000..e2dcfa7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.codec;
+
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
+import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
+
+/**
+ * A XOR erasure codec.
+ */
+public class XORErasureCodec extends AbstractErasureCodec {
+
+  public XORErasureCodec(ECSchema schema) {
+    super(schema);
+    assert(schema.getNumParityUnits() == 1);
+  }
+
+  @Override
+  public ErasureCoder createEncoder() {
+    return new XORErasureEncoder(getSchema());
+  }
+
+  @Override
+  public ErasureCoder createDecoder() {
+    return new XORErasureDecoder(getSchema());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
new file mode 100644
index 0000000..5cd0ee8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+
+/**
+ * A common class of basic facilities to be shared by encoder and decoder
+ *
+ * It implements the {@link ErasureCoder} interface.
+ */
+public abstract class AbstractErasureCoder
+    extends Configured implements ErasureCoder {
+
+  private final int numDataUnits;
+  private final int numParityUnits;
+
+  public AbstractErasureCoder(int numDataUnits, int numParityUnits) {
+    this.numDataUnits = numDataUnits;
+    this.numParityUnits = numParityUnits;
+  }
+
+  public AbstractErasureCoder(ECSchema schema) {
+    this(schema.getNumDataUnits(), schema.getNumParityUnits());
+  }
+
+  @Override
+  public int getNumDataUnits() {
+    return numDataUnits;
+  }
+
+  @Override
+  public int getNumParityUnits() {
+    return numParityUnits;
+  }
+
+  @Override
+  public boolean preferDirectBuffer() {
+    return false;
+  }
+
+  @Override
+  public void release() {
+    // Nothing to do by default
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java
new file mode 100644
index 0000000..c429d49
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+
+/**
+ * Abstract class for common facilities shared by {@link ErasureEncodingStep}
+ * and {@link ErasureDecodingStep}.
+ *
+ * It implements {@link ErasureEncodingStep}.
+ */
+public abstract class AbstractErasureCodingStep implements ErasureCodingStep {
+
+  private ECBlock[] inputBlocks;
+  private ECBlock[] outputBlocks;
+
+  /**
+   * Constructor given input blocks and output blocks.
+   * @param inputBlocks
+   * @param outputBlocks
+   */
+  public AbstractErasureCodingStep(ECBlock[] inputBlocks,
+                                   ECBlock[] outputBlocks) {
+    this.inputBlocks = inputBlocks;
+    this.outputBlocks = outputBlocks;
+  }
+
+  @Override
+  public ECBlock[] getInputBlocks() {
+    return inputBlocks;
+  }
+
+  @Override
+  public ECBlock[] getOutputBlocks() {
+    return outputBlocks;
+  }
+
+  @Override
+  public void finish() {
+    // NOOP by default
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java
new file mode 100644
index 0000000..3ea9311
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+
+/**
+ * An abstract erasure decoder that's to be inherited by new decoders.
+ *
+ * It implements the {@link ErasureCoder} interface.
+ */
+public abstract class AbstractErasureDecoder extends AbstractErasureCoder {
+
+  public AbstractErasureDecoder(int numDataUnits, int numParityUnits) {
+    super(numDataUnits, numParityUnits);
+  }
+
+  public AbstractErasureDecoder(ECSchema schema) {
+    super(schema);
+  }
+
+  @Override
+  public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup) {
+    // We may have more than this when considering complicate cases. HADOOP-11550
+    return prepareDecodingStep(blockGroup);
+  }
+
+  /**
+   * Perform decoding against a block blockGroup.
+   * @param blockGroup
+   * @return decoding step for caller to do the real work
+   */
+  protected abstract ErasureCodingStep prepareDecodingStep(
+      ECBlockGroup blockGroup);
+
+  /**
+   * We have all the data blocks and parity blocks as input blocks for
+   * recovering by default. It's codec specific
+   * @param blockGroup
+   * @return
+   */
+  protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) {
+    ECBlock[] inputBlocks = new ECBlock[getNumParityUnits()
+        + getNumDataUnits()];
+
+    System.arraycopy(blockGroup.getParityBlocks(), 0, inputBlocks, 0,
+        getNumParityUnits());
+    System.arraycopy(blockGroup.getDataBlocks(), 0, inputBlocks,
+        getNumParityUnits(), getNumDataUnits());
+
+    return inputBlocks;
+  }
+
+  /**
+   * Which blocks were erased ?
+   * @param blockGroup
+   * @return output blocks to recover
+   */
+  protected ECBlock[] getOutputBlocks(ECBlockGroup blockGroup) {
+    ECBlock[] outputBlocks = new ECBlock[getNumErasedBlocks(blockGroup)];
+
+    int idx = 0;
+
+    for (int i = 0; i < getNumParityUnits(); i++) {
+      if (blockGroup.getParityBlocks()[i].isErased()) {
+        outputBlocks[idx++] = blockGroup.getParityBlocks()[i];
+      }
+    }
+
+    for (int i = 0; i < getNumDataUnits(); i++) {
+      if (blockGroup.getDataBlocks()[i].isErased()) {
+        outputBlocks[idx++] = blockGroup.getDataBlocks()[i];
+      }
+    }
+
+    return outputBlocks;
+  }
+
+  /**
+   * Get the number of erased blocks in the block group.
+   * @param blockGroup
+   * @return number of erased blocks
+   */
+  protected int getNumErasedBlocks(ECBlockGroup blockGroup) {
+    int num = getNumErasedBlocks(blockGroup.getParityBlocks());
+    num += getNumErasedBlocks(blockGroup.getDataBlocks());
+    return num;
+  }
+
+  /**
+   * Find out how many blocks are erased.
+   * @param inputBlocks all the input blocks
+   * @return number of erased blocks
+   */
+  protected static int getNumErasedBlocks(ECBlock[] inputBlocks) {
+    int numErased = 0;
+    for (int i = 0; i < inputBlocks.length; i++) {
+      if (inputBlocks[i].isErased()) {
+        numErased ++;
+      }
+    }
+
+    return numErased;
+  }
+
+  /**
+   * Get indexes of erased blocks from inputBlocks
+   * @param inputBlocks
+   * @return indexes of erased blocks from inputBlocks
+   */
+  protected int[] getErasedIndexes(ECBlock[] inputBlocks) {
+    int numErased = getNumErasedBlocks(inputBlocks);
+    if (numErased == 0) {
+      return new int[0];
+    }
+
+    int[] erasedIndexes = new int[numErased];
+    int i = 0, j = 0;
+    for (; i < inputBlocks.length && j < erasedIndexes.length; i++) {
+      if (inputBlocks[i].isErased()) {
+        erasedIndexes[j++] = i;
+      }
+    }
+
+    return erasedIndexes;
+  }
+
+  /**
+   * Get erased input blocks from inputBlocks
+   * @param inputBlocks
+   * @return an array of erased blocks from inputBlocks
+   */
+  protected ECBlock[] getErasedBlocks(ECBlock[] inputBlocks) {
+    int numErased = getNumErasedBlocks(inputBlocks);
+    if (numErased == 0) {
+      return new ECBlock[0];
+    }
+
+    ECBlock[] erasedBlocks = new ECBlock[numErased];
+    int i = 0, j = 0;
+    for (; i < inputBlocks.length && j < erasedBlocks.length; i++) {
+      if (inputBlocks[i].isErased()) {
+        erasedBlocks[j++] = inputBlocks[i];
+      }
+    }
+
+    return erasedBlocks;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java
new file mode 100644
index 0000000..7c887e8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+
+/**
+ * An abstract erasure encoder that's to be inherited by new encoders.
+ *
+ * It implements the {@link ErasureCoder} interface.
+ */
+public abstract class AbstractErasureEncoder extends AbstractErasureCoder {
+
+  public AbstractErasureEncoder(int numDataUnits, int numParityUnits) {
+    super(numDataUnits, numParityUnits);
+  }
+
+  public AbstractErasureEncoder(ECSchema schema) {
+    super(schema);
+  }
+
+  @Override
+  public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup) {
+    // We may have more than this when considering complicate cases. HADOOP-11550
+    return prepareEncodingStep(blockGroup);
+  }
+
+  /**
+   * Perform encoding against a block group.
+   * @param blockGroup
+   * @return encoding step for caller to do the real work
+   */
+  protected abstract ErasureCodingStep prepareEncodingStep(
+      ECBlockGroup blockGroup);
+
+  protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) {
+    return blockGroup.getDataBlocks();
+  }
+
+  protected ECBlock[] getOutputBlocks(ECBlockGroup blockGroup) {
+    return blockGroup.getParityBlocks();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
new file mode 100644
index 0000000..f05ea41
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+
+/**
+ * An erasure coder to perform encoding or decoding given a group. Generally it
+ * involves calculating necessary internal steps according to codec logic. For
+ * each step,it calculates necessary input blocks to read chunks from and output
+ * parity blocks to write parity chunks into from the group. It also takes care
+ * of appropriate raw coder to use for the step. And encapsulates all the
+ * necessary info (input blocks, output blocks and raw coder) into a step
+ * represented by {@link ErasureCodingStep}. ErasureCoder callers can use the
+ * step to do the real work with retrieved input and output chunks.
+ *
+ * Note, currently only one coding step is supported. Will support complex cases
+ * of multiple coding steps.
+ *
+ */
+public interface ErasureCoder extends Configurable {
+
+  /**
+   * The number of data input units for the coding. A unit can be a byte,
+   * chunk or buffer or even a block.
+   * @return count of data input units
+   */
+  public int getNumDataUnits();
+
+  /**
+   * The number of parity output units for the coding. A unit can be a byte,
+   * chunk, buffer or even a block.
+   * @return count of parity output units
+   */
+  public int getNumParityUnits();
+
+  /**
+   * Calculate the encoding or decoding steps given a block blockGroup.
+   *
+   * Note, currently only one coding step is supported. Will support complex
+   * cases of multiple coding steps.
+   *
+   * @param blockGroup the erasure coding block group containing all necessary
+   *                   information for codec calculation
+   */
+  public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup);
+
+  /**
+   * Tell if direct or off-heap buffer is preferred or not. It's for callers to
+   * decide how to allocate coding chunk buffers, either on heap or off heap.
+   * It will return false by default.
+   * @return true if direct buffer is preferred for performance consideration,
+   * otherwise false.
+   */
+  public boolean preferDirectBuffer();
+
+  /**
+   * Release the resources if any. Good chance to invoke RawErasureCoder#release.
+   */
+  public void release();
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java
new file mode 100644
index 0000000..a3b177f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECChunk;
+
+/**
+ * Erasure coding step that's involved in encoding/decoding of a block group.
+ */
+public interface ErasureCodingStep {
+
+  /**
+   * Input blocks of readable data involved in this step, may be data blocks
+   * or parity blocks.
+   * @return input blocks
+   */
+  public ECBlock[] getInputBlocks();
+
+  /**
+   * Output blocks of writable buffers involved in this step, may be data
+   * blocks or parity blocks.
+   * @return output blocks
+   */
+  public ECBlock[] getOutputBlocks();
+
+  /**
+   * Perform encoding or decoding given the input chunks, and generated results
+   * will be written to the output chunks.
+   * @param inputChunks
+   * @param outputChunks
+   */
+  public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks);
+
+  /**
+   * Notify erasure coder that all the chunks of input blocks are processed so
+   * the coder can be able to update internal states, considering next step.
+   */
+  public void finish();
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java
new file mode 100644
index 0000000..980c580
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECChunk;
+import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
+
+/**
+ * Erasure decoding step, a wrapper of all the necessary information to perform
+ * a decoding step involved in the whole process of decoding a block group.
+ */
+public class ErasureDecodingStep extends AbstractErasureCodingStep {
+  private int[] erasedIndexes;
+  private RawErasureDecoder rawDecoder;
+
+  /**
+   * The constructor with all the necessary info.
+   * @param inputBlocks
+   * @param erasedIndexes the indexes of erased blocks in inputBlocks array
+   * @param outputBlocks
+   * @param rawDecoder
+   */
+  public ErasureDecodingStep(ECBlock[] inputBlocks, int[] erasedIndexes,
+                             ECBlock[] outputBlocks,
+                             RawErasureDecoder rawDecoder) {
+    super(inputBlocks, outputBlocks);
+    this.erasedIndexes = erasedIndexes;
+    this.rawDecoder = rawDecoder;
+  }
+
+  @Override
+  public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks) {
+    rawDecoder.decode(inputChunks, erasedIndexes, outputChunks);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java
new file mode 100644
index 0000000..bd7587f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECChunk;
+import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
+
+/**
+ * Erasure encoding step, a wrapper of all the necessary information to perform
+ * an encoding step involved in the whole process of encoding a block group.
+ */
+public class ErasureEncodingStep extends AbstractErasureCodingStep {
+
+  private RawErasureEncoder rawEncoder;
+
+  /**
+   * The constructor with all the necessary info.
+   * @param inputBlocks
+   * @param outputBlocks
+   * @param rawEncoder
+   */
+  public ErasureEncodingStep(ECBlock[] inputBlocks, ECBlock[] outputBlocks,
+                             RawErasureEncoder rawEncoder) {
+    super(inputBlocks, outputBlocks);
+    this.rawEncoder = rawEncoder;
+  }
+
+  @Override
+  public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks) {
+    rawEncoder.encode(inputChunks, outputChunks);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/dc0a6173/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java
new file mode 100644
index 0000000..f56674d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.coder;
+
+import org.apache.hadoop.io.erasurecode.CodecUtil;
+import org.apache.hadoop.io.erasurecode.ECBlock;
+import org.apache.hadoop.io.erasurecode.ECBlockGroup;
+import org.apache.hadoop.io.erasurecode.ECSchema;
+import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
+
+/**
+ * Reed-Solomon erasure decoder that decodes a block group.
+ *
+ * It implements {@link ErasureCoder}.
+ */
+public class RSErasureDecoder extends AbstractErasureDecoder {
+  private RawErasureDecoder rsRawDecoder;
+
+  public RSErasureDecoder(int numDataUnits, int numParityUnits) {
+    super(numDataUnits, numParityUnits);
+  }
+
+  public RSErasureDecoder(ECSchema schema) {
+    super(schema);
+  }
+
+  @Override
+  protected ErasureCodingStep prepareDecodingStep(final ECBlockGroup blockGroup) {
+
+    ECBlock[] inputBlocks = getInputBlocks(blockGroup);
+    ECBlock[] outputBlocks = getOutputBlocks(blockGroup);
+
+    RawErasureDecoder rawDecoder = checkCreateRSRawDecoder();
+    return new ErasureDecodingStep(inputBlocks,
+        getErasedIndexes(inputBlocks), outputBlocks, rawDecoder);
+  }
+
+  private RawErasureDecoder checkCreateRSRawDecoder() {
+    if (rsRawDecoder == null) {
+      rsRawDecoder = CodecUtil.createRSRawDecoder(getConf(),
+          getNumDataUnits(), getNumParityUnits());
+    }
+    return rsRawDecoder;
+  }
+
+  @Override
+  public void release() {
+    if (rsRawDecoder != null) {
+      rsRawDecoder.release();
+    }
+  }
+}


Mime
View raw message