Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DF5FA175AA for ; Thu, 18 Jun 2015 18:24:49 +0000 (UTC) Received: (qmail 51858 invoked by uid 500); 18 Jun 2015 18:24:41 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 50764 invoked by uid 500); 18 Jun 2015 18:24:41 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 49218 invoked by uid 99); 18 Jun 2015 18:24:40 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 18 Jun 2015 18:24:40 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 881E7E3CAA; Thu, 18 Jun 2015 18:24:40 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: zjshen@apache.org To: common-commits@hadoop.apache.org Date: Thu, 18 Jun 2015 18:25:08 -0000 Message-Id: <07098d065980408fac6ad2b96bd121b9@git.apache.org> In-Reply-To: <4c0d3a1bd9874f39bd3469db48d05967@git.apache.org> References: <4c0d3a1bd9874f39bd3469db48d05967@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [30/50] [abbrv] hadoop git commit: HDFS-4660. Block corruption can happen during pipeline recovery. Contributed by Kihwal Lee. HDFS-4660. Block corruption can happen during pipeline recovery. Contributed by Kihwal Lee. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b5097681 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b5097681 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b5097681 Branch: refs/heads/YARN-2928 Commit: b5097681edbadec0f860a6bbcc6672a3c3169404 Parents: 5e962f6 Author: Kihwal Lee Authored: Tue Jun 16 15:39:46 2015 -0500 Committer: Zhijie Shen Committed: Thu Jun 18 11:18:58 2015 -0700 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/server/datanode/BlockReceiver.java | 126 ++++++++++++++----- 2 files changed, 96 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b5097681/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 42588cc..b921f2c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1026,6 +1026,8 @@ Release 2.7.1 - UNRELEASED HDFS-8597. Fix TestFSImage#testZeroBlockSize on Windows. (Xiaoyu Yao) + HDFS-4660. Block corruption can happen during pipeline recovery (kihwal) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/b5097681/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index c46892d..2468f43 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -588,29 +588,59 @@ class BlockReceiver implements Closeable { try { long onDiskLen = replicaInfo.getBytesOnDisk(); if (onDiskLen bytesPerChecksum) { + throw new IOException("Unexpected packet data length for " + + block + " from " + inAddr + ": a partial chunk must be " + + " sent in an individual packet (data length = " + len + + " > bytesPerChecksum = " + bytesPerChecksum + ")"); } - - // If this is a partial chunk, then read in pre-existing checksum + + // If the last portion of the block file is not a full chunk, + // then read in pre-existing partial data chunk and recalculate + // the checksum so that the checksum calculation can continue + // from the right state. Checksum partialCrc = null; - if (!shouldNotWriteChecksum && firstByteInBlock % bytesPerChecksum != 0) { + if (doPartialCrc) { if (LOG.isDebugEnabled()) { LOG.debug("receivePacket for " + block - + ": bytesPerChecksum=" + bytesPerChecksum - + " does not divide firstByteInBlock=" + firstByteInBlock); + + ": previous write did not end at the chunk boundary." + + " onDiskLen=" + onDiskLen); } long offsetInChecksum = BlockMetadataHeader.getHeaderSize() + onDiskLen / bytesPerChecksum * checksumSize; partialCrc = computePartialChunkCrc(onDiskLen, offsetInChecksum); } + // The data buffer position where write will begin. If the packet + // data and on-disk data have no overlap, this will not be at the + // beginning of the buffer. int startByteToDisk = (int)(onDiskLen-firstByteInBlock) + dataBuf.arrayOffset() + dataBuf.position(); + // Actual number of data bytes to write. int numBytesToDisk = (int)(offsetInBlock-onDiskLen); // Write data to disk. @@ -625,31 +655,63 @@ class BlockReceiver implements Closeable { final byte[] lastCrc; if (shouldNotWriteChecksum) { lastCrc = null; - } else if (partialCrc != null) { - // If this is a partial chunk, then verify that this is the only - // chunk in the packet. Calculate new crc for this chunk. - if (len > bytesPerChecksum) { - throw new IOException("Unexpected packet data length for " - + block + " from " + inAddr + ": a partial chunk must be " - + " sent in an individual packet (data length = " + len - + " > bytesPerChecksum = " + bytesPerChecksum + ")"); + } else { + int skip = 0; + byte[] crcBytes = null; + + // First, overwrite the partial crc at the end, if necessary. + if (doPartialCrc) { // not chunk-aligned on disk + // Calculate new crc for this chunk. + int bytesToReadForRecalc = + (int)(bytesPerChecksum - partialChunkSizeOnDisk); + if (numBytesToDisk < bytesToReadForRecalc) { + bytesToReadForRecalc = numBytesToDisk; + } + + partialCrc.update(dataBuf.array(), startByteToDisk, + bytesToReadForRecalc); + byte[] buf = FSOutputSummer.convertToByteStream(partialCrc, + checksumSize); + crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length); + // prepare to overwrite last checksum + adjustCrcFilePosition(); + checksumOut.write(buf); + if(LOG.isDebugEnabled()) { + LOG.debug("Writing out partial crc for data len " + len + + ", skip=" + skip); + } + skip++; // For the partial chunk that was just read. } - partialCrc.update(dataBuf.array(), startByteToDisk, numBytesToDisk); - byte[] buf = FSOutputSummer.convertToByteStream(partialCrc, checksumSize); - lastCrc = copyLastChunkChecksum(buf, checksumSize, buf.length); - checksumOut.write(buf); - if(LOG.isDebugEnabled()) { - LOG.debug("Writing out partial crc for data len " + len); + + // Determine how many checksums need to be skipped up to the last + // boundary. The checksum after the boundary was already counted + // above. Only count the number of checksums skipped up to the + // boundary here. + long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum); + long skippedDataBytes = lastChunkBoundary - firstByteInBlock; + + if (skippedDataBytes > 0) { + skip += (int)(skippedDataBytes / bytesPerChecksum) + + ((skippedDataBytes % bytesPerChecksum == 0) ? 0 : 1); } - partialCrc = null; - } else { - // write checksum + skip *= checksumSize; // Convert to number of bytes + + // write the rest of checksum final int offset = checksumBuf.arrayOffset() + - checksumBuf.position(); - final int end = offset + checksumLen; - lastCrc = copyLastChunkChecksum(checksumBuf.array(), checksumSize, - end); - checksumOut.write(checksumBuf.array(), offset, checksumLen); + checksumBuf.position() + skip; + final int end = offset + checksumLen - skip; + // If offset > end, there is no more checksum to write. + // I.e. a partial chunk checksum rewrite happened and there is no + // more to write after that. + if (offset > end) { + assert crcBytes != null; + lastCrc = crcBytes; + } else { + final int remainingBytes = checksumLen - skip; + lastCrc = copyLastChunkChecksum(checksumBuf.array(), + checksumSize, end); + checksumOut.write(checksumBuf.array(), offset, remainingBytes); + } } /// flush entire packet, sync if requested