Return-Path: X-Original-To: apmail-hadoop-common-commits-archive@www.apache.org Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3CD6A178FA for ; Mon, 3 Nov 2014 23:13:32 +0000 (UTC) Received: (qmail 72604 invoked by uid 500); 3 Nov 2014 23:13:32 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 72535 invoked by uid 500); 3 Nov 2014 23:13:32 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 72526 invoked by uid 99); 3 Nov 2014 23:13:31 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 03 Nov 2014 23:13:31 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id AF297A06523; Mon, 3 Nov 2014 23:13:31 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wheat9@apache.org To: common-commits@hadoop.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: git commit: HDFS-7147. Update archival storage user documentation. Contributed by Tsz Wo Nicholas Sze. Date: Mon, 3 Nov 2014 23:13:31 +0000 (UTC) Repository: hadoop Updated Branches: refs/heads/branch-2.6 f2ef8c7b4 -> e94a044d1 HDFS-7147. Update archival storage user documentation. Contributed by Tsz Wo Nicholas Sze. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e94a044d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e94a044d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e94a044d Branch: refs/heads/branch-2.6 Commit: e94a044d180da2bd027d202cb43bf46a221177c4 Parents: f2ef8c7 Author: Haohui Mai Authored: Mon Nov 3 15:10:22 2014 -0800 Committer: Haohui Mai Committed: Mon Nov 3 15:13:19 2014 -0800 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../BlockStoragePolicySuite.java | 6 +- .../resources/blockStoragePolicy-default.xml | 118 ----------- .../src/site/apt/ArchivalStorage.apt.vm | 209 +++++++------------ hadoop-project/src/site/site.xml | 2 +- 5 files changed, 79 insertions(+), 259 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e94a044d/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 98a8aaf..f2f83de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -711,6 +711,9 @@ Release 2.6.0 - UNRELEASED HDFS-7291. Persist in-memory replicas with appropriate unbuffered copy API on POSIX and Windows. (Xiaoyu Yao via cnauroth) + HDFS-7147. Update archival storage user documentation. + (Tsz Wo Nicholas Sze via wheat9) + BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS HDFS-6387. HDFS CLI admin tool for creating & deleting an http://git-wip-us.apache.org/repos/asf/hadoop/blob/e94a044d/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java index c9b74bd..b0f18bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; + import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.XAttrHelper; @@ -104,9 +106,11 @@ public class BlockStoragePolicySuite { } public BlockStoragePolicy getPolicy(String policyName) { + Preconditions.checkNotNull(policyName); + if (policies != null) { for (BlockStoragePolicy policy : policies) { - if (policy != null && policy.getName().equals(policyName)) { + if (policy != null && policy.getName().equalsIgnoreCase(policyName)) { return policy; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/e94a044d/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml deleted file mode 100644 index 891909b..0000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - - - - - - - - - dfs.block.storage.policies - HOT:12, WARM:8, COLD:4 - - A list of block storage policy names and IDs. The syntax is - - NAME_1:ID_1, NAME_2:ID_2, ..., NAME_n:ID_n - - where ID is an integer in the range [1,15] and NAME is case insensitive. - The first element is the default policy. Empty list is not allowed. - - - - - - dfs.block.storage.policy.12 - DISK - - A list of storage types for storing the block replicas such as - - STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n - - When creating a block, the i-th replica is stored using i-th storage type - for i less than or equal to n, and - the j-th replica is stored using n-th storage type for j greater than n. - - Empty list is not allowed. - - Examples: - DISK : all replicas stored using DISK. - DISK, ARCHIVE : the first replica is stored using DISK and all the - remaining replicas are stored using ARCHIVE. - - - - - dfs.block.storage.policy.creation-fallback.12 - - - A list of storage types for creation fallback storage. - - STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n - - When creating a block, if a particular storage type specified in the policy - is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if - STORAGE_TYPE_i is also unavailable, the fallback STORAGE_TYPE_(i+1) is used. - In case that all fallback storages are unavailabe, the block will be created - with number of replicas less than the specified replication factor. - - An empty list indicates that there is no fallback storage. - - - - - dfs.block.storage.policy.replication-fallback.12 - ARCHIVE - - Similar to dfs.block.storage.policy.creation-fallback.x but for replication. - - - - - - dfs.block.storage.policy.8 - DISK, ARCHIVE - - - - dfs.block.storage.policy.creation-fallback.8 - DISK, ARCHIVE - - - - dfs.block.storage.policy.replication-fallback.8 - DISK, ARCHIVE - - - - - dfs.block.storage.policy.4 - ARCHIVE - - - - dfs.block.storage.policy.creation-fallback.4 - - - - - dfs.block.storage.policy.replication-fallback.4 - - - http://git-wip-us.apache.org/repos/asf/hadoop/blob/e94a044d/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm index 5301d52..69674c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm @@ -11,12 +11,12 @@ ~~ limitations under the License. See accompanying LICENSE file. --- - HDFS Archival Storage + Archival Storage, SSD & Memory --- --- ${maven.build.timestamp} -HDFS Archival Storage +Archival Storage, SSD & Memory %{toc|section=1|fromDepth=0} @@ -29,9 +29,13 @@ HDFS Archival Storage Adding more nodes to the cold storage can grow the storage independent of the compute capacity in the cluster. + The frameworks provided by Heterogeneous Storage and Archival Storage generalizes the HDFS architecture + to include other kinds of storage media including and . + Users may choose to store their data in SSD or memory for a better performance. + * {Storage Types and Storage Policies} -** {Storage Types: DISK, SSD and ARCHIVE} +** {Storage Types: ARCHIVE, DISK, SSD and RAM_DISK} The first phase of {{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}} @@ -45,7 +49,9 @@ HDFS Archival Storage which has high storage density (petabyte of storage) but little compute power, is added for supporting archival storage. -** {Storage Policies: Hot, Warm and Cold} + Another new storage type is added for supporting writing single replica files in memory. + +** {Storage Policies: Hot, Warm, Cold, All_SSD, One_SSD and Lazy_Persist} A new concept of storage policies is introduced in order to allow files to be stored in different storage types according to the storage policy. @@ -65,6 +71,14 @@ HDFS Archival Storage When a block is warm, some of its replicas are stored in DISK and the remaining replicas are stored in ARCHIVE. + * <> - for storing all replicas in SSD. + + * <> - for storing one of the replicas in SSD. + The remaining replicas are stored in DISK. + + * <> - for writing blocks with single replica in memory. + The replica is first written in RAM_DISK and then it is lazily persisted in DISK. + [] More formally, a storage policy consists of the following fields: @@ -89,149 +103,54 @@ HDFS Archival Storage The following is a typical storage policy table. -*--------+---------------+-------------------------+-----------------------+-----------------------+ -| <> | <>| <> | <> | <> | -| <> | <> | <<(n\ replicas)>> | <> | <> | -*--------+---------------+-------------------------+-----------------------+-----------------------+ -| 12 | Hot (default) | DISK: | \ | ARCHIVE | -*--------+---------------+-------------------------+-----------------------+-----------------------+ -| 8 | Warm | DISK: 1, ARCHIVE: -1 | ARCHIVE, DISK | ARCHIVE, DISK | -*--------+---------------+-------------------------+-----------------------+-----------------------+ -| 4 | Cold | ARCHIVE: | \ | \ | -*--------+---------------+-------------------------+-----------------------+-----------------------+ - - Note that cluster administrators may change the storage policy table - according to the characteristic of the cluster. - For example, in order to prevent losing archival data, - administrators may want to use DISK as fallback storage for replication in the Cold policy. - A drawback of such setting is that the DISK storages could be filled up with archival data. - As a result, the entire cluster may become full and cannot serve hot data anymore. - -** {Configurations} - -*** {Setting The List of All Storage Policies} - - * <> - - a list of block storage policy names and IDs. - The syntax is - - NAME_1:ID_1, NAME_2:ID_2, ..., NAME_:ID_ - - where ID is an integer in the closed range [1,15] and NAME is case insensitive. - The first element is the . Empty list is not allowed. - - The default value is shown below. - -+------------------------------------------+ - - dfs.block.storage.policies - HOT:12, WARM:8, COLD:4 - -+------------------------------------------+ +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| <> | <>| <> | <> | <> | +| <> | <> | <<(n\ replicas)>> | <> | <> | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 15 | Lasy_Persist | RAM_DISK: 1, DISK: -1 | DISK | DISK | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 12 | All_SSD | SSD: | DISK | DISK | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 10 | One_SSD | SSD: 1, DISK: -1 | SSD, DISK | SSD, DISK | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 7 | Hot (default) | DISK: | \ | ARCHIVE | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 5 | Warm | DISK: 1, ARCHIVE: -1 | ARCHIVE, DISK | ARCHIVE, DISK | +*--------+---------------+--------------------------+-----------------------+-----------------------+ +| 2 | Cold | ARCHIVE: | \ | \ | +*--------+---------------+--------------------------+-----------------------+-----------------------+ + + Note that the Lasy_Persist policy is useful only for single replica blocks. + For blocks with more than one replicas, all the replicas will be written to DISK + since writing only one of the replicas to RAM_DISK does not improve the overall performance. + +** {Storage Policy Resolution} + + When a file or directory is created, its storage policy is . + The storage policy can be specified using + the "<<<{{{Set Storage Policy}dfsadmin -setStoragePolicy}}>>>" command. + The effective storage policy of a file or directory is resolved by the following rules. + + [[1]] If the file or directory is specificed with a storage policy, return it. + + [[2]] For an unspecified file or directory, + if it is the root directory, return the . + Otherwise, return its parent's effective storage policy. [] -*** {Setting Storage Policy Details} - - The following configuration properties are for setting the details of each storage policy, - where <<<\>>> is the actual policy ID. - - * <>> - - a list of storage types for storing the block replicas. - The syntax is - - STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_ - - When creating a block, the -th replica is stored using -th storage type - for less than or equal to , and - the -th replica is stored using -th storage type for greater than . - - Empty list is not allowed. - - Examples: - -+------------------------------------------+ -DISK : all replicas stored using DISK. -DISK, ARCHIVE : the first replica is stored using DISK and all the - remaining replicas are stored using ARCHIVE. -+------------------------------------------+ - - * <>> - - a list of storage types for creation fallback storage. - The syntax is + The effective storage policy can be retrieved by + the "<<<{{{Set Storage Policy}dfsadmin -getStoragePolicy}}>>>" command. - STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n - - When creating a block, if a particular storage type specified in the policy - is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if - STORAGE_TYPE_ is also unavailable, the fallback STORAGE_TYPE_<(i+1)> is used. - In case all fallback storages are unavailable, the block will be created - with number of replicas less than the specified replication factor. - An empty list indicates that there is no fallback storage. +** {Configuration} - * <>> - - a list of storage types for replication fallback storage. - The usage of this configuration property is similar to - <<>>> - except that it takes effect on replication but not block creation. + * <> + - for enabling/disabling the storage policy feature. + The default value is <<>>. [] - The following are the default configuration values for Hot, Warm and Cold storage policies. - - * Block Storage Policy <> - -+------------------------------------------+ - - dfs.block.storage.policy.12 - DISK - - - dfs.block.storage.policy.creation-fallback.12 - - - - dfs.block.storage.policy.replication-fallback.12 - ARCHIVE - -+------------------------------------------+ - - * Block Storage Policy <> - -+------------------------------------------+ - - dfs.block.storage.policy.8 - DISK, ARCHIVE - - - dfs.block.storage.policy.creation-fallback.8 - DISK, ARCHIVE - - - dfs.block.storage.policy.replication-fallback.8 - DISK, ARCHIVE - -+------------------------------------------+ - - * Block Storage Policy <> - -+------------------------------------------+ - - dfs.block.storage.policy.4 - ARCHIVE - - - dfs.block.storage.policy.creation-fallback.4 - - - - dfs.block.storage.policy.replication-fallback.4 - - -+------------------------------------------+ - - [] * {Mover - A New Data Migration Tool} @@ -261,7 +180,19 @@ hdfs mover [-p | -f ] [] -* {<<>> Commands} +* {Storage Policy Commands} + +** {List Storage Policies} + + List out all the storage policies. + + * Command: + ++------------------------------------------+ +hdfs storagepolicies ++------------------------------------------+ + + * Arguments: none. ** {Set Storage Policy} http://git-wip-us.apache.org/repos/asf/hadoop/blob/e94a044d/hadoop-project/src/site/site.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 991447f..2fd1532 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -93,7 +93,7 @@ - +