Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 76B7A200BC2 for ; Thu, 17 Nov 2016 23:24:13 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 74E19160B0B; Thu, 17 Nov 2016 22:24:13 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7189D160AD8 for ; Thu, 17 Nov 2016 23:24:12 +0100 (CET) Received: (qmail 51319 invoked by uid 500); 17 Nov 2016 22:24:11 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 51310 invoked by uid 99); 17 Nov 2016 22:24:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 17 Nov 2016 22:24:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6BF10E0C0A; Thu, 17 Nov 2016 22:24:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: liuml07@apache.org To: common-commits@hadoop.apache.org Message-Id: <1c2686560e4b44f0aa7b12e930c2524c@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HADOOP-11601. Enhance FS spec & tests to mandate FileStatus.getBlocksize() >0 for non-empty files. Contributed by Steve Loughran Date: Thu, 17 Nov 2016 22:24:11 +0000 (UTC) archived-at: Thu, 17 Nov 2016 22:24:13 -0000 Repository: hadoop Updated Branches: refs/heads/trunk bd3735554 -> ae8849fe3 HADOOP-11601. Enhance FS spec & tests to mandate FileStatus.getBlocksize() >0 for non-empty files. Contributed by Steve Loughran Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ae8849fe Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ae8849fe Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ae8849fe Branch: refs/heads/trunk Commit: ae8849fe378e11b9db642ef7784c8e6a08199b96 Parents: bd37355 Author: Mingliang Liu Authored: Wed Nov 16 15:04:30 2016 -0800 Committer: Mingliang Liu Committed: Thu Nov 17 14:11:38 2016 -0800 ---------------------------------------------------------------------- .../src/site/markdown/filesystem/filesystem.md | 17 ++-- .../fs/contract/AbstractContractCreateTest.java | 96 +++++++++++++++++++- .../hadoop/fs/contract/ContractTestUtils.java | 31 +++++++ 3 files changed, 134 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ae8849fe/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 063bd97..b18b5f6 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -78,6 +78,7 @@ Get the status of a path if isFile(FS, p) : stat.length = len(FS.Files[p]) stat.isdir = False + stat.blockSize > 0 elif isDir(FS, p) : stat.length = 0 stat.isdir = True @@ -451,13 +452,13 @@ split calculations to divide work optimally across a set of worker processes. #### Postconditions - result = integer >= 0 + result = integer > 0 Although there is no defined minimum value for this result, as it is used to partition work during job submission, a block size -that is too small will result in either too many jobs being submitted -for efficient work, or the `JobSubmissionClient` running out of memory. - +that is too small will result in badly partitioned workload, +or even the `JobSubmissionClient` and equivalent +running out of memory as it calculates the partitions. Any FileSystem that does not actually break files into blocks SHOULD return a number for this that results in efficient processing. @@ -503,12 +504,12 @@ on the filesystem. #### Postconditions - + if len(FS, P) > 0: getFileStatus(P).getBlockSize() > 0 result == getFileStatus(P).getBlockSize() -The outcome of this operation MUST be identical to that contained in -the `FileStatus` returned from `getFileStatus(P)`. - +1. The outcome of this operation MUST be identical to the value of + `getFileStatus(P).getBlockSize()`. +1. By inference, it MUST be > 0 for any file of length > 0. ## State Changing Operations http://git-wip-us.apache.org/repos/asf/hadoop/blob/ae8849fe/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java index 84dc775..2230fd4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java @@ -21,8 +21,8 @@ package org.apache.hadoop.fs.contract; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IOUtils; import org.junit.Test; import org.junit.internal.AssumptionViolatedException; @@ -30,16 +30,22 @@ import java.io.FileNotFoundException; import java.io.IOException; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.getFileStatusEventually; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; /** - * Test creating files, overwrite options &c + * Test creating files, overwrite options etc. */ public abstract class AbstractContractCreateTest extends AbstractFSContractTestBase { + /** + * How long to wait for a path to become visible. + */ + public static final int CREATE_TIMEOUT = 15000; + @Test public void testCreateNewFile() throws Throwable { describe("Foundational 'create a file' test"); @@ -180,4 +186,90 @@ public abstract class AbstractContractCreateTest extends } } } + + @Test + public void testCreatedFileIsVisibleOnFlush() throws Throwable { + describe("verify that a newly created file exists once a flush has taken " + + "place"); + Path path = path("testCreatedFileIsVisibleOnFlush"); + FileSystem fs = getFileSystem(); + try(FSDataOutputStream out = fs.create(path, + false, + 4096, + (short) 1, + 1024)) { + out.write('a'); + out.flush(); + if (!fs.exists(path)) { + + if (isSupported(IS_BLOBSTORE)) { + // object store: downgrade to a skip so that the failure is visible + // in test results + skip("Filesystem is an object store and newly created files are not " + + "immediately visible"); + } + assertPathExists("expected path to be visible before file closed", + path); + } + } + } + + @Test + public void testCreatedFileIsEventuallyVisible() throws Throwable { + describe("verify a written to file is visible after the stream is closed"); + Path path = path("testCreatedFileIsEventuallyVisible"); + FileSystem fs = getFileSystem(); + try( + FSDataOutputStream out = fs.create(path, + false, + 4096, + (short) 1, + 1024) + ) { + out.write(0x01); + out.close(); + getFileStatusEventually(fs, path, CREATE_TIMEOUT); + } + } + + @Test + public void testFileStatusBlocksizeNonEmptyFile() throws Throwable { + describe("validate the block size of a filesystem and files within it"); + FileSystem fs = getFileSystem(); + + long rootPath = fs.getDefaultBlockSize(path("/")); + assertTrue("Root block size is invalid " + rootPath, + rootPath > 0); + + Path path = path("testFileStatusBlocksizeNonEmptyFile"); + byte[] data = dataset(256, 'a', 'z'); + + writeDataset(fs, path, data, data.length, 1024 * 1024, false); + + validateBlockSize(fs, path, 1); + } + + @Test + public void testFileStatusBlocksizeEmptyFile() throws Throwable { + describe("check that an empty file may return a 0-byte blocksize"); + FileSystem fs = getFileSystem(); + Path path = path("testFileStatusBlocksizeEmptyFile"); + ContractTestUtils.touch(fs, path); + validateBlockSize(fs, path, 0); + } + + private void validateBlockSize(FileSystem fs, Path path, int minValue) + throws IOException, InterruptedException { + FileStatus status = + getFileStatusEventually(fs, path, CREATE_TIMEOUT); + String statusDetails = status.toString(); + assertTrue("File status block size too low: " + statusDetails + + " min value: " + minValue, + status.getBlockSize() >= minValue); + long defaultBlockSize = fs.getDefaultBlockSize(path); + assertTrue("fs.getDefaultBlockSize(" + path + ") size " + + defaultBlockSize + " is below the minimum of " + minValue, + defaultBlockSize >= minValue); + } + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/ae8849fe/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java index f6b6389..90ae974 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java @@ -1098,6 +1098,36 @@ public class ContractTestUtils extends Assert { } /** + * Get the status of a path eventually, even if the FS doesn't have create + * consistency. If the path is not there by the time the timeout completes, + * an assertion is raised. + * @param fs FileSystem + * @param path path to look for + * @param timeout timeout in milliseconds + * @return the status + * @throws IOException if an I/O error occurs while writing or reading the + * test file other than file not found + */ + public static FileStatus getFileStatusEventually(FileSystem fs, Path path, + int timeout) throws IOException, InterruptedException { + long endTime = System.currentTimeMillis() + timeout; + FileStatus stat = null; + do { + try { + stat = fs.getFileStatus(path); + } catch (FileNotFoundException e) { + if (System.currentTimeMillis() > endTime) { + // timeout, raise an assert with more diagnostics + assertPathExists(fs, "Path not found after " + timeout + " mS", path); + } else { + Thread.sleep(50); + } + } + } while (stat == null); + return stat; + } + + /** * Recursively list all entries, with a depth first traversal of the * directory tree. * @param path path @@ -1471,4 +1501,5 @@ public class ContractTestUtils extends Assert { return endTime; } } + } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org