hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From psomo...@apache.org
Subject hbase git commit: HBASE-20592 Create a tool to verify tables do not have prefix tree encoding
Date Fri, 01 Jun 2018 17:18:17 GMT
Repository: hbase
Updated Branches:
  refs/heads/master d5ea92632 -> 096866828


HBASE-20592 Create a tool to verify tables do not have prefix tree encoding

Signed-off-by: Mike Drob <mdrob@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/09686682
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/09686682
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/09686682

Branch: refs/heads/master
Commit: 0968668283d9e3b23c2da8c2c4a0a77caee2e9af
Parents: d5ea926
Author: Peter Somogyi <psomogyi@apache.org>
Authored: Fri May 25 15:03:17 2018 +0200
Committer: Peter Somogyi <psomogyi@apache.org>
Committed: Fri Jun 1 19:17:49 2018 +0200

----------------------------------------------------------------------
 bin/hbase                                       |   3 +
 .../hadoop/hbase/tool/PreUpgradeValidator.java  | 129 +++++++++++++++++++
 .../_chapters/appendix_hfile_format.adoc        |   2 +-
 src/main/asciidoc/_chapters/compression.adoc    |   2 +-
 src/main/asciidoc/_chapters/ops_mgt.adoc        |  22 ++++
 src/main/asciidoc/_chapters/upgrading.adoc      |   4 +
 6 files changed, 160 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/bin/hbase
----------------------------------------------------------------------
diff --git a/bin/hbase b/bin/hbase
index f1e2306..4f1c854 100755
--- a/bin/hbase
+++ b/bin/hbase
@@ -108,6 +108,7 @@ if [ $# = 0 ]; then
   echo "  regionsplitter  Run RegionSplitter tool"
   echo "  rowcounter      Run RowCounter tool"
   echo "  cellcounter     Run CellCounter tool"
+  echo "  pre-upgrade     Run Pre-Upgrade validator tool"
   echo "  CLASSNAME       Run the class named CLASSNAME"
   exit 1
 fi
@@ -471,6 +472,8 @@ elif [ "$COMMAND" = "rowcounter" ] ; then
   CLASS='org.apache.hadoop.hbase.mapreduce.RowCounter'
 elif [ "$COMMAND" = "cellcounter" ] ; then
   CLASS='org.apache.hadoop.hbase.mapreduce.CellCounter'
+elif [ "$COMMAND" = "pre-upgrade" ] ; then
+  CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator'
 else
   CLASS=$COMMAND
 fi

http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
new file mode 100644
index 0000000..138af6a
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
@@ -0,0 +1,129 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.tool;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.HBaseInterfaceAudience;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.util.AbstractHBaseTool;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
+
+/**
+ * Tool for validating that cluster can be upgraded from HBase 1.x to 2.0
+ * <p>
+ * Available validations:
+ * <ul>
+ * <li>all: Run all pre-upgrade validations</li>
+ * <li>validateDBE: Check Data Block Encoding for column families</li>
+ * </ul>
+ * </p>
+ */
+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
+public class PreUpgradeValidator extends AbstractHBaseTool {
+
+  public static final String NAME = "pre-upgrade";
+  private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeValidator.class);
+  private static final byte[] DATA_BLOCK_ENCODING = Bytes.toBytes("DATA_BLOCK_ENCODING");
+  private boolean validateAll;
+  private boolean validateDBE;
+
+  /**
+   * Check DataBlockEncodings for column families.
+   *
+   * @return DataBlockEncoding compatible with HBase 2
+   * @throws IOException if a remote or network exception occurs
+   */
+  private boolean validateDBE() throws IOException {
+    int incompatibilities = 0;
+
+    LOG.info("Validating Data Block Encodings");
+
+    try (Connection connection = ConnectionFactory.createConnection(getConf());
+        Admin admin = connection.getAdmin()) {
+      List<TableDescriptor> tableDescriptors = admin.listTableDescriptors();
+      String encoding = "";
+
+      for (TableDescriptor td : tableDescriptors) {
+        ColumnFamilyDescriptor[] columnFamilies = td.getColumnFamilies();
+        for (ColumnFamilyDescriptor cfd : columnFamilies) {
+          try {
+            encoding = Bytes.toString(cfd.getValue(DATA_BLOCK_ENCODING));
+            // IllegalArgumentException will be thrown if encoding is incompatible with 2.0
+            DataBlockEncoding.valueOf(encoding);
+          } catch (IllegalArgumentException e) {
+            incompatibilities++;
+            LOG.warn("Incompatible DataBlockEncoding for table: {}, cf: {}, encoding: {}",
+                td.getTableName().getNameAsString(), cfd.getNameAsString(), encoding);
+          }
+        }
+      }
+    }
+
+    if (incompatibilities > 0) {
+      LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not
"
+          + "upgrade until these encodings are converted to a supported one.", incompatibilities);
+      LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed "
+          + "for instructions.");
+      return false;
+    } else {
+      LOG.info("The used Data Block Encodings are compatible with HBase 2.0.");
+      return true;
+    }
+  }
+
+  @Override
+  protected void addOptions() {
+    addOptNoArg("all", "Run all pre-upgrade validations");
+    addOptNoArg("validateDBE", "Validate DataBlockEncoding are compatible on the cluster");
+  }
+
+  @Override
+  protected void processOptions(CommandLine cmd) {
+    validateAll = cmd.hasOption("all");
+    validateDBE = cmd.hasOption("validateDBE");
+  }
+
+  @Override
+  protected int doWork() throws Exception {
+    boolean validationFailed = false;
+    if (validateDBE || validateAll) {
+      if (validateDBE()) {
+        validationFailed = true;
+      }
+    }
+
+    return validationFailed ? 1 : 0;
+  }
+
+  public static void main(String[] args) {
+    new PreUpgradeValidator().doStaticMain(args);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
index 20f46d3..0f37beb 100644
--- a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
+++ b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
@@ -321,7 +321,7 @@ Version 3 added two additional pieces of information to the reserved keys
in the
 When reading a Version 3 HFile the presence of `MAX_TAGS_LEN` is used to determine how to
deserialize the cells within a data block.
 Therefore, consumers must read the file's info block prior to reading any data blocks.
 
-When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN ` when flushing the
memstore to underlying filesystem and when using prefix tree encoding for data blocks, as
described in <<compression,compression>>.
+When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN` when flushing the
memstore to underlying filesystem.
 
 When compacting extant files, the default writer will omit `MAX_TAGS_LEN` if all of the files
selected do not themselves contain any cells with tags.
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/src/main/asciidoc/_chapters/compression.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/compression.adoc b/src/main/asciidoc/_chapters/compression.adoc
index c89e305..b2ff5ce 100644
--- a/src/main/asciidoc/_chapters/compression.adoc
+++ b/src/main/asciidoc/_chapters/compression.adoc
@@ -125,7 +125,7 @@ The compression or codec type to use depends on the characteristics of
your data
 In general, you need to weigh your options between smaller size and faster compression/decompression.
Following are some general guidelines, expanded from a discussion at link:http://search-hadoop.com/m/lL12B1PFVhp1[Documenting
Guidance on compression and codecs].
 
 * If you have long keys (compared to the values) or many columns, use a prefix encoder.
-  FAST_DIFF is recommended, as more testing is needed for Prefix Tree encoding.
+  FAST_DIFF is recommended.
 * If the values are large (and not precompressed, such as images), use a data block compressor.
 * Use GZIP for [firstterm]_cold data_, which is accessed infrequently.
   GZIP compression uses more CPU resources than Snappy or LZO, but provides a higher compression
ratio.

http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/src/main/asciidoc/_chapters/ops_mgt.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 11a0320..6fef714 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -842,6 +842,28 @@ For general usage instructions, pass the `-h` option.
 
 The LoadTestTool has received many updates in recent HBase releases, including support for
namespaces, support for tags, cell-level ACLS and visibility labels, testing security-related
features, ability to specify the number of regions per server, tests for multi-get RPC calls,
and tests relating to replication.
 
+[[ops.pre-upgrade]]
+=== Pre-Upgrade validator
+Pre-Upgrade validator tool can be used to check the cluster for known incompatibilities before
upgrading from HBase 1 to HBase 2.
+To run all the checks use the `-all` flag.
+
+[source, bash]
+----
+$ bin/hbase pre-upgrade -all
+----
+
+==== DataBlockEncoding validation
+HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families.
+To verify that none of the column families are using incompatible Data Block Encodings in
the cluster run the following command.
+
+[source, bash]
+----
+$ bin/hbase pre-upgrade -validateDBE
+----
+
+This check validates all column families and print out any incompatibilities.
+To change `PREFIX_TREE` encoding to supported one check <<upgrade2.0.prefix-tree.removed,_prefix-tree_
encoding removed>>.
+
 [[ops.regionmgt]]
 == Region Management
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/09686682/src/main/asciidoc/_chapters/upgrading.adoc
----------------------------------------------------------------------
diff --git a/src/main/asciidoc/_chapters/upgrading.adoc b/src/main/asciidoc/_chapters/upgrading.adoc
index 6f0405d..a126422 100644
--- a/src/main/asciidoc/_chapters/upgrading.adoc
+++ b/src/main/asciidoc/_chapters/upgrading.adoc
@@ -427,6 +427,10 @@ This feature was removed because it as not being actively maintained.
If interes
 sweet facility which improved random read latencies at the expensive of slowed writes,
 write the HBase developers list at _dev at hbase dot apache dot org_.
 
+The prefix-tree encoding needs to be removed from all tables before upgrading to HBase 2.0+.
+To do that first you need to change the encoding from PREFIX_TREE to something else that
is supported in HBase 2.0.
+After that you have to major compact the tables that were using PREFIX_TREE encoding before.
+To check which column families are using incompatible data block encoding you can use <<ops.pre-upgrade,Pre-Upgrade
Validator>>.
 
 [[upgrade2.0.metrics]]
 .Changed metrics


Mime
View raw message