hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject [01/35] hbase-site git commit: Published site at 9250bf809155ebe93fd6ae8a0485b22c744fdf70.
Date Tue, 15 Nov 2016 05:45:52 GMT
Repository: hbase-site
Updated Branches:
  refs/heads/asf-site dbfeb6d66 -> 36e5b7d69


http://git-wip-us.apache.org/repos/asf/hbase-site/blob/f17356a7/xref/org/apache/hadoop/hbase/util/RegionSplitter.html
----------------------------------------------------------------------
diff --git a/xref/org/apache/hadoop/hbase/util/RegionSplitter.html b/xref/org/apache/hadoop/hbase/util/RegionSplitter.html
index 5cae107..c705d7f 100644
--- a/xref/org/apache/hadoop/hbase/util/RegionSplitter.html
+++ b/xref/org/apache/hadoop/hbase/util/RegionSplitter.html
@@ -30,1103 +30,1096 @@
 <a class="jxr_linenumber" name="20" href="#20">20</a>  
 <a class="jxr_linenumber" name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
 <a class="jxr_linenumber" name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> java.math.BigInteger;
-<a class="jxr_linenumber" name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> java.util.Arrays;
-<a class="jxr_linenumber" name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> java.util.Collection;
-<a class="jxr_linenumber" name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> java.util.Collections;
-<a class="jxr_linenumber" name="26" href="#26">26</a>  <strong class="jxr_keyword">import</strong> java.util.Comparator;
-<a class="jxr_linenumber" name="27" href="#27">27</a>  <strong class="jxr_keyword">import</strong> java.util.LinkedList;
-<a class="jxr_linenumber" name="28" href="#28">28</a>  <strong class="jxr_keyword">import</strong> java.util.List;
+<a class="jxr_linenumber" name="23" href="#23">23</a>
+<a class="jxr_linenumber" name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> java.util.Arrays;
+<a class="jxr_linenumber" name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> java.util.Collection;
+<a class="jxr_linenumber" name="26" href="#26">26</a>  <strong class="jxr_keyword">import</strong> java.util.LinkedList;
+<a class="jxr_linenumber" name="27" href="#27">27</a>  <strong class="jxr_keyword">import</strong> java.util.List;
+<a class="jxr_linenumber" name="28" href="#28">28</a>  <strong class="jxr_keyword">import</strong> java.util.Map;
 <a class="jxr_linenumber" name="29" href="#29">29</a>  <strong class="jxr_keyword">import</strong> java.util.Set;
 <a class="jxr_linenumber" name="30" href="#30">30</a>  <strong class="jxr_keyword">import</strong> java.util.TreeMap;
-<a class="jxr_linenumber" name="31" href="#31">31</a>  
-<a class="jxr_linenumber" name="32" href="#32">32</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.CommandLine;
-<a class="jxr_linenumber" name="33" href="#33">33</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.GnuParser;
-<a class="jxr_linenumber" name="34" href="#34">34</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.HelpFormatter;
-<a class="jxr_linenumber" name="35" href="#35">35</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.OptionBuilder;
-<a class="jxr_linenumber" name="36" href="#36">36</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.Options;
-<a class="jxr_linenumber" name="37" href="#37">37</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.ParseException;
-<a class="jxr_linenumber" name="38" href="#38">38</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.lang.ArrayUtils;
-<a class="jxr_linenumber" name="39" href="#39">39</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.lang.StringUtils;
-<a class="jxr_linenumber" name="40" href="#40">40</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.logging.Log;
-<a class="jxr_linenumber" name="41" href="#41">41</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.logging.LogFactory;
-<a class="jxr_linenumber" name="42" href="#42">42</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.conf.Configuration;
-<a class="jxr_linenumber" name="43" href="#43">43</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataInputStream;
-<a class="jxr_linenumber" name="44" href="#44">44</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataOutputStream;
-<a class="jxr_linenumber" name="45" href="#45">45</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FileSystem;
-<a class="jxr_linenumber" name="46" href="#46">46</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.Path;
-<a class="jxr_linenumber" name="47" href="#47">47</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ClusterStatus;
-<a class="jxr_linenumber" name="48" href="#48">48</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HBaseConfiguration;
-<a class="jxr_linenumber" name="49" href="#49">49</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HColumnDescriptor;
-<a class="jxr_linenumber" name="50" href="#50">50</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionInfo;
-<a class="jxr_linenumber" name="51" href="#51">51</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionLocation;
-<a class="jxr_linenumber" name="52" href="#52">52</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HTableDescriptor;
-<a class="jxr_linenumber" name="53" href="#53">53</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.MetaTableAccessor;
-<a class="jxr_linenumber" name="54" href="#54">54</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ServerName;
-<a class="jxr_linenumber" name="55" href="#55">55</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.TableName;
-<a class="jxr_linenumber" name="56" href="#56">56</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.classification.InterfaceAudience;
-<a class="jxr_linenumber" name="57" href="#57">57</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Admin;
-<a class="jxr_linenumber" name="58" href="#58">58</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ClusterConnection;
-<a class="jxr_linenumber" name="59" href="#59">59</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Connection;
-<a class="jxr_linenumber" name="60" href="#60">60</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ConnectionFactory;
-<a class="jxr_linenumber" name="61" href="#61">61</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.NoServerForRegionException;
-<a class="jxr_linenumber" name="62" href="#62">62</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.RegionLocator;
-<a class="jxr_linenumber" name="63" href="#63">63</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Table;
-<a class="jxr_linenumber" name="64" href="#64">64</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
-<a class="jxr_linenumber" name="65" href="#65">65</a>  
-<a class="jxr_linenumber" name="66" href="#66">66</a>  <strong class="jxr_keyword">import</strong> com.google.common.base.Preconditions;
-<a class="jxr_linenumber" name="67" href="#67">67</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Lists;
-<a class="jxr_linenumber" name="68" href="#68">68</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Maps;
-<a class="jxr_linenumber" name="69" href="#69">69</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Sets;
-<a class="jxr_linenumber" name="70" href="#70">70</a>  
-<a class="jxr_linenumber" name="71" href="#71">71</a>  <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="72" href="#72">72</a>  <em class="jxr_javadoccomment"> * The {@link RegionSplitter} class provides several utilities to help in the</em>
-<a class="jxr_linenumber" name="73" href="#73">73</a>  <em class="jxr_javadoccomment"> * administration lifecycle for developers who choose to manually split regions</em>
-<a class="jxr_linenumber" name="74" href="#74">74</a>  <em class="jxr_javadoccomment"> * instead of having HBase handle that automatically. The most useful utilities</em>
-<a class="jxr_linenumber" name="75" href="#75">75</a>  <em class="jxr_javadoccomment"> * are:</em>
-<a class="jxr_linenumber" name="76" href="#76">76</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="77" href="#77">77</a>  <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="78" href="#78">78</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Create a table with a specified number of pre-split regions</em>
-<a class="jxr_linenumber" name="79" href="#79">79</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Execute a rolling split of all regions on an existing table</em>
-<a class="jxr_linenumber" name="80" href="#80">80</a>  <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="81" href="#81">81</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="82" href="#82">82</a>  <em class="jxr_javadoccomment"> * Both operations can be safely done on a live server.</em>
-<a class="jxr_linenumber" name="83" href="#83">83</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="84" href="#84">84</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; How do I turn off automatic splitting? &lt;br&gt;</em>
-<a class="jxr_linenumber" name="85" href="#85">85</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Automatic splitting is determined by the configuration value</em>
-<a class="jxr_linenumber" name="86" href="#86">86</a>  <em class="jxr_javadoccomment"> * &lt;i&gt;HConstants.HREGION_MAX_FILESIZE&lt;/i&gt;. It is not recommended that you set this</em>
-<a class="jxr_linenumber" name="87" href="#87">87</a>  <em class="jxr_javadoccomment"> * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting</em>
-<a class="jxr_linenumber" name="88" href="#88">88</a>  <em class="jxr_javadoccomment"> * is 100GB, which would result in &amp;gt; 1hr major compactions if reached.</em>
-<a class="jxr_linenumber" name="89" href="#89">89</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="90" href="#90">90</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; Why did the original authors decide to manually split? &lt;br&gt;</em>
-<a class="jxr_linenumber" name="91" href="#91">91</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Specific workload characteristics of our use case allowed us</em>
-<a class="jxr_linenumber" name="92" href="#92">92</a>  <em class="jxr_javadoccomment"> * to benefit from a manual split system.</em>
-<a class="jxr_linenumber" name="93" href="#93">93</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="94" href="#94">94</a>  <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="95" href="#95">95</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Data (~1k) that would grow instead of being replaced</em>
-<a class="jxr_linenumber" name="96" href="#96">96</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Data growth was roughly uniform across all regions</em>
-<a class="jxr_linenumber" name="97" href="#97">97</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;OLTP workload. Data loss is a big deal.</em>
-<a class="jxr_linenumber" name="98" href="#98">98</a>  <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="99" href="#99">99</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="100" href="#100">100</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; Why is manual splitting good for this workload? &lt;br&gt;</em>
-<a class="jxr_linenumber" name="101" href="#101">101</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Although automated splitting is not a bad option, there are</em>
-<a class="jxr_linenumber" name="102" href="#102">102</a> <em class="jxr_javadoccomment"> * benefits to manual splitting.</em>
-<a class="jxr_linenumber" name="103" href="#103">103</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="104" href="#104">104</a> <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="105" href="#105">105</a> <em class="jxr_javadoccomment"> * &lt;li&gt;With growing amounts of data, splits will continually be needed. Since</em>
-<a class="jxr_linenumber" name="106" href="#106">106</a> <em class="jxr_javadoccomment"> * you always know exactly what regions you have, long-term debugging and</em>
-<a class="jxr_linenumber" name="107" href="#107">107</a> <em class="jxr_javadoccomment"> * profiling is much easier with manual splits. It is hard to trace the logs to</em>
-<a class="jxr_linenumber" name="108" href="#108">108</a> <em class="jxr_javadoccomment"> * understand region level problems if it keeps splitting and getting renamed.</em>
-<a class="jxr_linenumber" name="109" href="#109">109</a> <em class="jxr_javadoccomment"> * &lt;li&gt;Data offlining bugs + unknown number of split regions == oh crap! If an</em>
-<a class="jxr_linenumber" name="110" href="#110">110</a> <em class="jxr_javadoccomment"> * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and</em>
-<a class="jxr_linenumber" name="111" href="#111">111</a> <em class="jxr_javadoccomment"> * you notice it a day or so later, you can be assured that the regions</em>
-<a class="jxr_linenumber" name="112" href="#112">112</a> <em class="jxr_javadoccomment"> * specified in these files are the same as the current regions and you have</em>
-<a class="jxr_linenumber" name="113" href="#113">113</a> <em class="jxr_javadoccomment"> * less headaches trying to restore/replay your data.</em>
-<a class="jxr_linenumber" name="114" href="#114">114</a> <em class="jxr_javadoccomment"> * &lt;li&gt;You can finely tune your compaction algorithm. With roughly uniform data</em>
-<a class="jxr_linenumber" name="115" href="#115">115</a> <em class="jxr_javadoccomment"> * growth, it's easy to cause split / compaction storms as the regions all</em>
-<a class="jxr_linenumber" name="116" href="#116">116</a> <em class="jxr_javadoccomment"> * roughly hit the same data size at the same time. With manual splits, you can</em>
-<a class="jxr_linenumber" name="117" href="#117">117</a> <em class="jxr_javadoccomment"> * let staggered, time-based major compactions spread out your network IO load.</em>
-<a class="jxr_linenumber" name="118" href="#118">118</a> <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="119" href="#119">119</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="120" href="#120">120</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; What's the optimal number of pre-split regions to create? &lt;br&gt;</em>
-<a class="jxr_linenumber" name="121" href="#121">121</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Mileage will vary depending upon your application.</em>
-<a class="jxr_linenumber" name="122" href="#122">122</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="123" href="#123">123</a> <em class="jxr_javadoccomment"> * The short answer for our application is that we started with 10 pre-split</em>
-<a class="jxr_linenumber" name="124" href="#124">124</a> <em class="jxr_javadoccomment"> * regions / server and watched our data growth over time. It's better to err on</em>
-<a class="jxr_linenumber" name="125" href="#125">125</a> <em class="jxr_javadoccomment"> * the side of too little regions and rolling split later.</em>
-<a class="jxr_linenumber" name="126" href="#126">126</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="127" href="#127">127</a> <em class="jxr_javadoccomment"> * The more complicated answer is that this depends upon the largest storefile</em>
-<a class="jxr_linenumber" name="128" href="#128">128</a> <em class="jxr_javadoccomment"> * in your region. With a growing data size, this will get larger over time. You</em>
-<a class="jxr_linenumber" name="129" href="#129">129</a> <em class="jxr_javadoccomment"> * want the largest region to be just big enough that the</em>
-<a class="jxr_linenumber" name="130" href="#130">130</a> <em class="jxr_javadoccomment"> * {@link org.apache.hadoop.hbase.regionserver.HStore} compact</em>
-<a class="jxr_linenumber" name="131" href="#131">131</a> <em class="jxr_javadoccomment"> * selection algorithm only compacts it due to a timed major. If you don't, your</em>
-<a class="jxr_linenumber" name="132" href="#132">132</a> <em class="jxr_javadoccomment"> * cluster can be prone to compaction storms as the algorithm decides to run</em>
-<a class="jxr_linenumber" name="133" href="#133">133</a> <em class="jxr_javadoccomment"> * major compactions on a large series of regions all at once. Note that</em>
-<a class="jxr_linenumber" name="134" href="#134">134</a> <em class="jxr_javadoccomment"> * compaction storms are due to the uniform data growth, not the manual split</em>
-<a class="jxr_linenumber" name="135" href="#135">135</a> <em class="jxr_javadoccomment"> * decision.</em>
-<a class="jxr_linenumber" name="136" href="#136">136</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="137" href="#137">137</a> <em class="jxr_javadoccomment"> * If you pre-split your regions too thin, you can increase the major compaction</em>
-<a class="jxr_linenumber" name="138" href="#138">138</a> <em class="jxr_javadoccomment"> * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size</em>
-<a class="jxr_linenumber" name="139" href="#139">139</a> <em class="jxr_javadoccomment"> * grows too large, use this script to perform a network IO safe rolling split</em>
-<a class="jxr_linenumber" name="140" href="#140">140</a> <em class="jxr_javadoccomment"> * of all regions.</em>
-<a class="jxr_linenumber" name="141" href="#141">141</a> <em class="jxr_javadoccomment"> */</em>
-<a class="jxr_linenumber" name="142" href="#142">142</a> @InterfaceAudience.Private
-<a class="jxr_linenumber" name="143" href="#143">143</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">RegionSplitter</a> {
-<a class="jxr_linenumber" name="144" href="#144">144</a>   <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> Log LOG = LogFactory.getLog(RegionSplitter.<strong class="jxr_keyword">class</strong>);
-<a class="jxr_linenumber" name="145" href="#145">145</a> 
-<a class="jxr_linenumber" name="146" href="#146">146</a>   <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="147" href="#147">147</a> <em class="jxr_javadoccomment">   * A generic interface for the RegionSplitter code to use for all it's</em>
-<a class="jxr_linenumber" name="148" href="#148">148</a> <em class="jxr_javadoccomment">   * functionality. Note that the original authors of this code use</em>
-<a class="jxr_linenumber" name="149" href="#149">149</a> <em class="jxr_javadoccomment">   * {@link HexStringSplit} to partition their table and set it as default, but</em>
-<a class="jxr_linenumber" name="150" href="#150">150</a> <em class="jxr_javadoccomment">   * provided this for your custom algorithm. To use, create a new derived class</em>
-<a class="jxr_linenumber" name="151" href="#151">151</a> <em class="jxr_javadoccomment">   * from this interface and call {@link RegionSplitter#createPresplitTable} or</em>
-<a class="jxr_linenumber" name="152" href="#152">152</a> <em class="jxr_javadoccomment">   * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the</em>
-<a class="jxr_linenumber" name="153" href="#153">153</a> <em class="jxr_javadoccomment">   * argument splitClassName giving the name of your class.</em>
-<a class="jxr_linenumber" name="154" href="#154">154</a> <em class="jxr_javadoccomment">   */</em>
-<a class="jxr_linenumber" name="155" href="#155">155</a>   <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> {
-<a class="jxr_linenumber" name="156" href="#156">156</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="157" href="#157">157</a> <em class="jxr_javadoccomment">     * Split a pre-existing region into 2 regions.</em>
-<a class="jxr_linenumber" name="158" href="#158">158</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="159" href="#159">159</a> <em class="jxr_javadoccomment">     * @param start</em>
-<a class="jxr_linenumber" name="160" href="#160">160</a> <em class="jxr_javadoccomment">     *          first row (inclusive)</em>
-<a class="jxr_linenumber" name="161" href="#161">161</a> <em class="jxr_javadoccomment">     * @param end</em>
-<a class="jxr_linenumber" name="162" href="#162">162</a> <em class="jxr_javadoccomment">     *          last row (exclusive)</em>
-<a class="jxr_linenumber" name="163" href="#163">163</a> <em class="jxr_javadoccomment">     * @return the split row to use</em>
-<a class="jxr_linenumber" name="164" href="#164">164</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="165" href="#165">165</a>     byte[] split(byte[] start, byte[] end);
-<a class="jxr_linenumber" name="166" href="#166">166</a> 
-<a class="jxr_linenumber" name="167" href="#167">167</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="168" href="#168">168</a> <em class="jxr_javadoccomment">     * Split an entire table.</em>
-<a class="jxr_linenumber" name="169" href="#169">169</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="170" href="#170">170</a> <em class="jxr_javadoccomment">     * @param numRegions</em>
-<a class="jxr_linenumber" name="171" href="#171">171</a> <em class="jxr_javadoccomment">     *          number of regions to split the table into</em>
-<a class="jxr_linenumber" name="172" href="#172">172</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="173" href="#173">173</a> <em class="jxr_javadoccomment">     * @throws RuntimeException</em>
-<a class="jxr_linenumber" name="174" href="#174">174</a> <em class="jxr_javadoccomment">     *           user input is validated at this time. may throw a runtime</em>
-<a class="jxr_linenumber" name="175" href="#175">175</a> <em class="jxr_javadoccomment">     *           exception in response to a parse failure</em>
-<a class="jxr_linenumber" name="176" href="#176">176</a> <em class="jxr_javadoccomment">     * @return array of split keys for the initial regions of the table. The</em>
-<a class="jxr_linenumber" name="177" href="#177">177</a> <em class="jxr_javadoccomment">     *         length of the returned array should be numRegions-1.</em>
-<a class="jxr_linenumber" name="178" href="#178">178</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="179" href="#179">179</a>     byte[][] split(<strong class="jxr_keyword">int</strong> numRegions);
-<a class="jxr_linenumber" name="180" href="#180">180</a> 
-<a class="jxr_linenumber" name="181" href="#181">181</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="182" href="#182">182</a> <em class="jxr_javadoccomment">     * In HBase, the first row is represented by an empty byte array. This might</em>
-<a class="jxr_linenumber" name="183" href="#183">183</a> <em class="jxr_javadoccomment">     * cause problems with your split algorithm or row printing. All your APIs</em>
-<a class="jxr_linenumber" name="184" href="#184">184</a> <em class="jxr_javadoccomment">     * will be passed firstRow() instead of empty array.</em>
-<a class="jxr_linenumber" name="185" href="#185">185</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="186" href="#186">186</a> <em class="jxr_javadoccomment">     * @return your representation of your first row</em>
-<a class="jxr_linenumber" name="187" href="#187">187</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="188" href="#188">188</a>     byte[] firstRow();
-<a class="jxr_linenumber" name="189" href="#189">189</a> 
-<a class="jxr_linenumber" name="190" href="#190">190</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="191" href="#191">191</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. This might</em>
-<a class="jxr_linenumber" name="192" href="#192">192</a> <em class="jxr_javadoccomment">     * cause problems with your split algorithm or row printing. All your APIs</em>
-<a class="jxr_linenumber" name="193" href="#193">193</a> <em class="jxr_javadoccomment">     * will be passed firstRow() instead of empty array.</em>
-<a class="jxr_linenumber" name="194" href="#194">194</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="195" href="#195">195</a> <em class="jxr_javadoccomment">     * @return your representation of your last row</em>
-<a class="jxr_linenumber" name="196" href="#196">196</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="197" href="#197">197</a>     byte[] lastRow();
-<a class="jxr_linenumber" name="198" href="#198">198</a> 
-<a class="jxr_linenumber" name="199" href="#199">199</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="200" href="#200">200</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. Set this</em>
-<a class="jxr_linenumber" name="201" href="#201">201</a> <em class="jxr_javadoccomment">     * value to help the split code understand how to evenly divide the first</em>
-<a class="jxr_linenumber" name="202" href="#202">202</a> <em class="jxr_javadoccomment">     * region.</em>
-<a class="jxr_linenumber" name="203" href="#203">203</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="204" href="#204">204</a> <em class="jxr_javadoccomment">     * @param userInput</em>
-<a class="jxr_linenumber" name="205" href="#205">205</a> <em class="jxr_javadoccomment">     *          raw user input (may throw RuntimeException on parse failure)</em>
-<a class="jxr_linenumber" name="206" href="#206">206</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="207" href="#207">207</a>     <strong class="jxr_keyword">void</strong> setFirstRow(String userInput);
-<a class="jxr_linenumber" name="208" href="#208">208</a> 
-<a class="jxr_linenumber" name="209" href="#209">209</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="210" href="#210">210</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. Set this</em>
-<a class="jxr_linenumber" name="211" href="#211">211</a> <em class="jxr_javadoccomment">     * value to help the split code understand how to evenly divide the last</em>
-<a class="jxr_linenumber" name="212" href="#212">212</a> <em class="jxr_javadoccomment">     * region. Note that this last row is inclusive for all rows sharing the</em>
-<a class="jxr_linenumber" name="213" href="#213">213</a> <em class="jxr_javadoccomment">     * same prefix.</em>
-<a class="jxr_linenumber" name="214" href="#214">214</a> <em class="jxr_javadoccomment">     *</em>
-<a class="jxr_linenumber" name="215" href="#215">215</a> <em class="jxr_javadoccomment">     * @param userInput</em>
-<a class="jxr_linenumber" name="216" href="#216">216</a> <em class="jxr_javadoccomment">     *          raw user input (may throw RuntimeException on parse failure)</em>
-<a class="jxr_linenumber" name="217" href="#217">217</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="218" href="#218">218</a>     <strong class="jxr_keyword">void</strong> setLastRow(String userInput);
-<a class="jxr_linenumber" name="219" href="#219">219</a> 
-<a class="jxr_linenumber" name="220" href="#220">220</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="221" href="#221">221</a> <em class="jxr_javadoccomment">     * @param input</em>
-<a class="jxr_linenumber" name="222" href="#222">222</a> <em class="jxr_javadoccomment">     *          user or file input for row</em>
-<a class="jxr_linenumber" name="223" href="#223">223</a> <em class="jxr_javadoccomment">     * @return byte array representation of this row for HBase</em>
-<a class="jxr_linenumber" name="224" href="#224">224</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="225" href="#225">225</a>     byte[] strToRow(String input);
-<a class="jxr_linenumber" name="226" href="#226">226</a> 
-<a class="jxr_linenumber" name="227" href="#227">227</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="228" href="#228">228</a> <em class="jxr_javadoccomment">     * @param row</em>
-<a class="jxr_linenumber" name="229" href="#229">229</a> <em class="jxr_javadoccomment">     *          byte array representing a row in HBase</em>
-<a class="jxr_linenumber" name="230" href="#230">230</a> <em class="jxr_javadoccomment">     * @return String to use for debug &amp;amp; file printing</em>
-<a class="jxr_linenumber" name="231" href="#231">231</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="232" href="#232">232</a>     String rowToStr(byte[] row);
-<a class="jxr_linenumber" name="233" href="#233">233</a> 
-<a class="jxr_linenumber" name="234" href="#234">234</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="235" href="#235">235</a> <em class="jxr_javadoccomment">     * @return the separator character to use when storing / printing the row</em>
-<a class="jxr_linenumber" name="236" href="#236">236</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="237" href="#237">237</a>     String separator();
-<a class="jxr_linenumber" name="238" href="#238">238</a> 
-<a class="jxr_linenumber" name="239" href="#239">239</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="240" href="#240">240</a> <em class="jxr_javadoccomment">     * Set the first row</em>
-<a class="jxr_linenumber" name="241" href="#241">241</a> <em class="jxr_javadoccomment">     * @param userInput byte array of the row key.</em>
-<a class="jxr_linenumber" name="242" href="#242">242</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="243" href="#243">243</a>     <strong class="jxr_keyword">void</strong> setFirstRow(byte[] userInput);
-<a class="jxr_linenumber" name="244" href="#244">244</a> 
-<a class="jxr_linenumber" name="245" href="#245">245</a>     <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="246" href="#246">246</a> <em class="jxr_javadoccomment">     * Set the last row</em>
-<a class="jxr_linenumber" name="247" href="#247">247</a> <em class="jxr_javadoccomment">     * @param userInput byte array of the row key.</em>
-<a class="jxr_linenumber" name="248" href="#248">248</a> <em class="jxr_javadoccomment">     */</em>
-<a class="jxr_linenumber" name="249" href="#249">249</a>     <strong class="jxr_keyword">void</strong> setLastRow(byte[] userInput);
-<a class="jxr_linenumber" name="250" href="#250">250</a>   }
-<a class="jxr_linenumber" name="251" href="#251">251</a> 
-<a class="jxr_linenumber" name="252" href="#252">252</a>   <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="253" href="#253">253</a> <em class="jxr_javadoccomment">   * The main function for the RegionSplitter application. Common uses:</em>
-<a class="jxr_linenumber" name="254" href="#254">254</a> <em class="jxr_javadoccomment">   * &lt;p&gt;</em>
-<a class="jxr_linenumber" name="255" href="#255">255</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="256" href="#256">256</a> <em class="jxr_javadoccomment">   * &lt;li&gt;create a table named 'myTable' with 60 pre-split regions containing 2</em>
-<a class="jxr_linenumber" name="257" href="#257">257</a> <em class="jxr_javadoccomment">   * column families 'test' &amp;amp; 'rs', assuming the keys are hex-encoded ASCII:</em>
-<a class="jxr_linenumber" name="258" href="#258">258</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="259" href="#259">259</a> <em class="jxr_javadoccomment">   * &lt;li&gt;bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs</em>
-<a class="jxr_linenumber" name="260" href="#260">260</a> <em class="jxr_javadoccomment">   * myTable HexStringSplit</em>
-<a class="jxr_linenumber" name="261" href="#261">261</a> <em class="jxr_javadoccomment">   * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="262" href="#262">262</a> <em class="jxr_javadoccomment">   * &lt;li&gt;perform a rolling split of 'myTable' (i.e. 60 =&amp;gt; 120 regions), # 2</em>
-<a class="jxr_linenumber" name="263" href="#263">263</a> <em class="jxr_javadoccomment">   * outstanding splits at a time, assuming keys are uniformly distributed</em>
-<a class="jxr_linenumber" name="264" href="#264">264</a> <em class="jxr_javadoccomment">   * bytes:</em>
-<a class="jxr_linenumber" name="265" href="#265">265</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
-<a class="jxr_linenumber" name="266" href="#266">266</a> <em class="jxr_javadoccomment">   * &lt;li&gt;bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable</em>
-<a class="jxr_linenumber" name="267" href="#267">267</a> <em class="jxr_javadoccomment">   * UniformSplit</em>
+<a class="jxr_linenumber" name="31" href="#31">31</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.CommandLine;
+<a class="jxr_linenumber" name="32" href="#32">32</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.GnuParser;
+<a class="jxr_linenumber" name="33" href="#33">33</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.HelpFormatter;
+<a class="jxr_linenumber" name="34" href="#34">34</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.OptionBuilder;
+<a class="jxr_linenumber" name="35" href="#35">35</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.Options;
+<a class="jxr_linenumber" name="36" href="#36">36</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.cli.ParseException;
+<a class="jxr_linenumber" name="37" href="#37">37</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.lang.ArrayUtils;
+<a class="jxr_linenumber" name="38" href="#38">38</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.lang.StringUtils;
+<a class="jxr_linenumber" name="39" href="#39">39</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.logging.Log;
+<a class="jxr_linenumber" name="40" href="#40">40</a>  <strong class="jxr_keyword">import</strong> org.apache.commons.logging.LogFactory;
+<a class="jxr_linenumber" name="41" href="#41">41</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.conf.Configuration;
+<a class="jxr_linenumber" name="42" href="#42">42</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataInputStream;
+<a class="jxr_linenumber" name="43" href="#43">43</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FSDataOutputStream;
+<a class="jxr_linenumber" name="44" href="#44">44</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.FileSystem;
+<a class="jxr_linenumber" name="45" href="#45">45</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.fs.Path;
+<a class="jxr_linenumber" name="46" href="#46">46</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ClusterStatus;
+<a class="jxr_linenumber" name="47" href="#47">47</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HBaseConfiguration;
+<a class="jxr_linenumber" name="48" href="#48">48</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HColumnDescriptor;
+<a class="jxr_linenumber" name="49" href="#49">49</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionInfo;
+<a class="jxr_linenumber" name="50" href="#50">50</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HRegionLocation;
+<a class="jxr_linenumber" name="51" href="#51">51</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.HTableDescriptor;
+<a class="jxr_linenumber" name="52" href="#52">52</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.MetaTableAccessor;
+<a class="jxr_linenumber" name="53" href="#53">53</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.ServerName;
+<a class="jxr_linenumber" name="54" href="#54">54</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.TableName;
+<a class="jxr_linenumber" name="55" href="#55">55</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.classification.InterfaceAudience;
+<a class="jxr_linenumber" name="56" href="#56">56</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Admin;
+<a class="jxr_linenumber" name="57" href="#57">57</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ClusterConnection;
+<a class="jxr_linenumber" name="58" href="#58">58</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Connection;
+<a class="jxr_linenumber" name="59" href="#59">59</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.ConnectionFactory;
+<a class="jxr_linenumber" name="60" href="#60">60</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.NoServerForRegionException;
+<a class="jxr_linenumber" name="61" href="#61">61</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.RegionLocator;
+<a class="jxr_linenumber" name="62" href="#62">62</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.client.Table;
+<a class="jxr_linenumber" name="63" href="#63">63</a>  <strong class="jxr_keyword">import</strong> org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
+<a class="jxr_linenumber" name="64" href="#64">64</a>
+<a class="jxr_linenumber" name="65" href="#65">65</a>  <strong class="jxr_keyword">import</strong> com.google.common.base.Preconditions;
+<a class="jxr_linenumber" name="66" href="#66">66</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Lists;
+<a class="jxr_linenumber" name="67" href="#67">67</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Maps;
+<a class="jxr_linenumber" name="68" href="#68">68</a>  <strong class="jxr_keyword">import</strong> com.google.common.collect.Sets;
+<a class="jxr_linenumber" name="69" href="#69">69</a>
+<a class="jxr_linenumber" name="70" href="#70">70</a>  <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="71" href="#71">71</a>  <em class="jxr_javadoccomment"> * The {@link RegionSplitter} class provides several utilities to help in the</em>
+<a class="jxr_linenumber" name="72" href="#72">72</a>  <em class="jxr_javadoccomment"> * administration lifecycle for developers who choose to manually split regions</em>
+<a class="jxr_linenumber" name="73" href="#73">73</a>  <em class="jxr_javadoccomment"> * instead of having HBase handle that automatically. The most useful utilities</em>
+<a class="jxr_linenumber" name="74" href="#74">74</a>  <em class="jxr_javadoccomment"> * are:</em>
+<a class="jxr_linenumber" name="75" href="#75">75</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="76" href="#76">76</a>  <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="77" href="#77">77</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Create a table with a specified number of pre-split regions</em>
+<a class="jxr_linenumber" name="78" href="#78">78</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Execute a rolling split of all regions on an existing table</em>
+<a class="jxr_linenumber" name="79" href="#79">79</a>  <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
+<a class="jxr_linenumber" name="80" href="#80">80</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="81" href="#81">81</a>  <em class="jxr_javadoccomment"> * Both operations can be safely done on a live server.</em>
+<a class="jxr_linenumber" name="82" href="#82">82</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="83" href="#83">83</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; How do I turn off automatic splitting? &lt;br&gt;</em>
+<a class="jxr_linenumber" name="84" href="#84">84</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Automatic splitting is determined by the configuration value</em>
+<a class="jxr_linenumber" name="85" href="#85">85</a>  <em class="jxr_javadoccomment"> * &lt;i&gt;HConstants.HREGION_MAX_FILESIZE&lt;/i&gt;. It is not recommended that you set this</em>
+<a class="jxr_linenumber" name="86" href="#86">86</a>  <em class="jxr_javadoccomment"> * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting</em>
+<a class="jxr_linenumber" name="87" href="#87">87</a>  <em class="jxr_javadoccomment"> * is 100GB, which would result in &amp;gt; 1hr major compactions if reached.</em>
+<a class="jxr_linenumber" name="88" href="#88">88</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="89" href="#89">89</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; Why did the original authors decide to manually split? &lt;br&gt;</em>
+<a class="jxr_linenumber" name="90" href="#90">90</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Specific workload characteristics of our use case allowed us</em>
+<a class="jxr_linenumber" name="91" href="#91">91</a>  <em class="jxr_javadoccomment"> * to benefit from a manual split system.</em>
+<a class="jxr_linenumber" name="92" href="#92">92</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="93" href="#93">93</a>  <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="94" href="#94">94</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Data (~1k) that would grow instead of being replaced</em>
+<a class="jxr_linenumber" name="95" href="#95">95</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;Data growth was roughly uniform across all regions</em>
+<a class="jxr_linenumber" name="96" href="#96">96</a>  <em class="jxr_javadoccomment"> * &lt;li&gt;OLTP workload. Data loss is a big deal.</em>
+<a class="jxr_linenumber" name="97" href="#97">97</a>  <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
+<a class="jxr_linenumber" name="98" href="#98">98</a>  <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="99" href="#99">99</a>  <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; Why is manual splitting good for this workload? &lt;br&gt;</em>
+<a class="jxr_linenumber" name="100" href="#100">100</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Although automated splitting is not a bad option, there are</em>
+<a class="jxr_linenumber" name="101" href="#101">101</a> <em class="jxr_javadoccomment"> * benefits to manual splitting.</em>
+<a class="jxr_linenumber" name="102" href="#102">102</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="103" href="#103">103</a> <em class="jxr_javadoccomment"> * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="104" href="#104">104</a> <em class="jxr_javadoccomment"> * &lt;li&gt;With growing amounts of data, splits will continually be needed. Since</em>
+<a class="jxr_linenumber" name="105" href="#105">105</a> <em class="jxr_javadoccomment"> * you always know exactly what regions you have, long-term debugging and</em>
+<a class="jxr_linenumber" name="106" href="#106">106</a> <em class="jxr_javadoccomment"> * profiling is much easier with manual splits. It is hard to trace the logs to</em>
+<a class="jxr_linenumber" name="107" href="#107">107</a> <em class="jxr_javadoccomment"> * understand region level problems if it keeps splitting and getting renamed.</em>
+<a class="jxr_linenumber" name="108" href="#108">108</a> <em class="jxr_javadoccomment"> * &lt;li&gt;Data offlining bugs + unknown number of split regions == oh crap! If an</em>
+<a class="jxr_linenumber" name="109" href="#109">109</a> <em class="jxr_javadoccomment"> * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and</em>
+<a class="jxr_linenumber" name="110" href="#110">110</a> <em class="jxr_javadoccomment"> * you notice it a day or so later, you can be assured that the regions</em>
+<a class="jxr_linenumber" name="111" href="#111">111</a> <em class="jxr_javadoccomment"> * specified in these files are the same as the current regions and you have</em>
+<a class="jxr_linenumber" name="112" href="#112">112</a> <em class="jxr_javadoccomment"> * less headaches trying to restore/replay your data.</em>
+<a class="jxr_linenumber" name="113" href="#113">113</a> <em class="jxr_javadoccomment"> * &lt;li&gt;You can finely tune your compaction algorithm. With roughly uniform data</em>
+<a class="jxr_linenumber" name="114" href="#114">114</a> <em class="jxr_javadoccomment"> * growth, it's easy to cause split / compaction storms as the regions all</em>
+<a class="jxr_linenumber" name="115" href="#115">115</a> <em class="jxr_javadoccomment"> * roughly hit the same data size at the same time. With manual splits, you can</em>
+<a class="jxr_linenumber" name="116" href="#116">116</a> <em class="jxr_javadoccomment"> * let staggered, time-based major compactions spread out your network IO load.</em>
+<a class="jxr_linenumber" name="117" href="#117">117</a> <em class="jxr_javadoccomment"> * &lt;/ul&gt;</em>
+<a class="jxr_linenumber" name="118" href="#118">118</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="119" href="#119">119</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Question:&lt;/b&gt; What's the optimal number of pre-split regions to create? &lt;br&gt;</em>
+<a class="jxr_linenumber" name="120" href="#120">120</a> <em class="jxr_javadoccomment"> * &lt;b&gt;Answer:&lt;/b&gt; Mileage will vary depending upon your application.</em>
+<a class="jxr_linenumber" name="121" href="#121">121</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="122" href="#122">122</a> <em class="jxr_javadoccomment"> * The short answer for our application is that we started with 10 pre-split</em>
+<a class="jxr_linenumber" name="123" href="#123">123</a> <em class="jxr_javadoccomment"> * regions / server and watched our data growth over time. It's better to err on</em>
+<a class="jxr_linenumber" name="124" href="#124">124</a> <em class="jxr_javadoccomment"> * the side of too little regions and rolling split later.</em>
+<a class="jxr_linenumber" name="125" href="#125">125</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="126" href="#126">126</a> <em class="jxr_javadoccomment"> * The more complicated answer is that this depends upon the largest storefile</em>
+<a class="jxr_linenumber" name="127" href="#127">127</a> <em class="jxr_javadoccomment"> * in your region. With a growing data size, this will get larger over time. You</em>
+<a class="jxr_linenumber" name="128" href="#128">128</a> <em class="jxr_javadoccomment"> * want the largest region to be just big enough that the</em>
+<a class="jxr_linenumber" name="129" href="#129">129</a> <em class="jxr_javadoccomment"> * {@link org.apache.hadoop.hbase.regionserver.HStore} compact</em>
+<a class="jxr_linenumber" name="130" href="#130">130</a> <em class="jxr_javadoccomment"> * selection algorithm only compacts it due to a timed major. If you don't, your</em>
+<a class="jxr_linenumber" name="131" href="#131">131</a> <em class="jxr_javadoccomment"> * cluster can be prone to compaction storms as the algorithm decides to run</em>
+<a class="jxr_linenumber" name="132" href="#132">132</a> <em class="jxr_javadoccomment"> * major compactions on a large series of regions all at once. Note that</em>
+<a class="jxr_linenumber" name="133" href="#133">133</a> <em class="jxr_javadoccomment"> * compaction storms are due to the uniform data growth, not the manual split</em>
+<a class="jxr_linenumber" name="134" href="#134">134</a> <em class="jxr_javadoccomment"> * decision.</em>
+<a class="jxr_linenumber" name="135" href="#135">135</a> <em class="jxr_javadoccomment"> * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="136" href="#136">136</a> <em class="jxr_javadoccomment"> * If you pre-split your regions too thin, you can increase the major compaction</em>
+<a class="jxr_linenumber" name="137" href="#137">137</a> <em class="jxr_javadoccomment"> * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size</em>
+<a class="jxr_linenumber" name="138" href="#138">138</a> <em class="jxr_javadoccomment"> * grows too large, use this script to perform a network IO safe rolling split</em>
+<a class="jxr_linenumber" name="139" href="#139">139</a> <em class="jxr_javadoccomment"> * of all regions.</em>
+<a class="jxr_linenumber" name="140" href="#140">140</a> <em class="jxr_javadoccomment"> */</em>
+<a class="jxr_linenumber" name="141" href="#141">141</a> @InterfaceAudience.Private
+<a class="jxr_linenumber" name="142" href="#142">142</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">RegionSplitter</a> {
+<a class="jxr_linenumber" name="143" href="#143">143</a>   <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> Log LOG = LogFactory.getLog(RegionSplitter.<strong class="jxr_keyword">class</strong>);
+<a class="jxr_linenumber" name="144" href="#144">144</a>
+<a class="jxr_linenumber" name="145" href="#145">145</a>   <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="146" href="#146">146</a> <em class="jxr_javadoccomment">   * A generic interface for the RegionSplitter code to use for all it's</em>
+<a class="jxr_linenumber" name="147" href="#147">147</a> <em class="jxr_javadoccomment">   * functionality. Note that the original authors of this code use</em>
+<a class="jxr_linenumber" name="148" href="#148">148</a> <em class="jxr_javadoccomment">   * {@link HexStringSplit} to partition their table and set it as default, but</em>
+<a class="jxr_linenumber" name="149" href="#149">149</a> <em class="jxr_javadoccomment">   * provided this for your custom algorithm. To use, create a new derived class</em>
+<a class="jxr_linenumber" name="150" href="#150">150</a> <em class="jxr_javadoccomment">   * from this interface and call {@link RegionSplitter#createPresplitTable} or</em>
+<a class="jxr_linenumber" name="151" href="#151">151</a> <em class="jxr_javadoccomment">   * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the</em>
+<a class="jxr_linenumber" name="152" href="#152">152</a> <em class="jxr_javadoccomment">   * argument splitClassName giving the name of your class.</em>
+<a class="jxr_linenumber" name="153" href="#153">153</a> <em class="jxr_javadoccomment">   */</em>
+<a class="jxr_linenumber" name="154" href="#154">154</a>   <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> {
+<a class="jxr_linenumber" name="155" href="#155">155</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="156" href="#156">156</a> <em class="jxr_javadoccomment">     * Split a pre-existing region into 2 regions.</em>
+<a class="jxr_linenumber" name="157" href="#157">157</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="158" href="#158">158</a> <em class="jxr_javadoccomment">     * @param start</em>
+<a class="jxr_linenumber" name="159" href="#159">159</a> <em class="jxr_javadoccomment">     *          first row (inclusive)</em>
+<a class="jxr_linenumber" name="160" href="#160">160</a> <em class="jxr_javadoccomment">     * @param end</em>
+<a class="jxr_linenumber" name="161" href="#161">161</a> <em class="jxr_javadoccomment">     *          last row (exclusive)</em>
+<a class="jxr_linenumber" name="162" href="#162">162</a> <em class="jxr_javadoccomment">     * @return the split row to use</em>
+<a class="jxr_linenumber" name="163" href="#163">163</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="164" href="#164">164</a>     byte[] split(byte[] start, byte[] end);
+<a class="jxr_linenumber" name="165" href="#165">165</a>
+<a class="jxr_linenumber" name="166" href="#166">166</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="167" href="#167">167</a> <em class="jxr_javadoccomment">     * Split an entire table.</em>
+<a class="jxr_linenumber" name="168" href="#168">168</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="169" href="#169">169</a> <em class="jxr_javadoccomment">     * @param numRegions</em>
+<a class="jxr_linenumber" name="170" href="#170">170</a> <em class="jxr_javadoccomment">     *          number of regions to split the table into</em>
+<a class="jxr_linenumber" name="171" href="#171">171</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="172" href="#172">172</a> <em class="jxr_javadoccomment">     * @throws RuntimeException</em>
+<a class="jxr_linenumber" name="173" href="#173">173</a> <em class="jxr_javadoccomment">     *           user input is validated at this time. may throw a runtime</em>
+<a class="jxr_linenumber" name="174" href="#174">174</a> <em class="jxr_javadoccomment">     *           exception in response to a parse failure</em>
+<a class="jxr_linenumber" name="175" href="#175">175</a> <em class="jxr_javadoccomment">     * @return array of split keys for the initial regions of the table. The</em>
+<a class="jxr_linenumber" name="176" href="#176">176</a> <em class="jxr_javadoccomment">     *         length of the returned array should be numRegions-1.</em>
+<a class="jxr_linenumber" name="177" href="#177">177</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="178" href="#178">178</a>     byte[][] split(<strong class="jxr_keyword">int</strong> numRegions);
+<a class="jxr_linenumber" name="179" href="#179">179</a>
+<a class="jxr_linenumber" name="180" href="#180">180</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="181" href="#181">181</a> <em class="jxr_javadoccomment">     * In HBase, the first row is represented by an empty byte array. This might</em>
+<a class="jxr_linenumber" name="182" href="#182">182</a> <em class="jxr_javadoccomment">     * cause problems with your split algorithm or row printing. All your APIs</em>
+<a class="jxr_linenumber" name="183" href="#183">183</a> <em class="jxr_javadoccomment">     * will be passed firstRow() instead of empty array.</em>
+<a class="jxr_linenumber" name="184" href="#184">184</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="185" href="#185">185</a> <em class="jxr_javadoccomment">     * @return your representation of your first row</em>
+<a class="jxr_linenumber" name="186" href="#186">186</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="187" href="#187">187</a>     byte[] firstRow();
+<a class="jxr_linenumber" name="188" href="#188">188</a>
+<a class="jxr_linenumber" name="189" href="#189">189</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="190" href="#190">190</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. This might</em>
+<a class="jxr_linenumber" name="191" href="#191">191</a> <em class="jxr_javadoccomment">     * cause problems with your split algorithm or row printing. All your APIs</em>
+<a class="jxr_linenumber" name="192" href="#192">192</a> <em class="jxr_javadoccomment">     * will be passed firstRow() instead of empty array.</em>
+<a class="jxr_linenumber" name="193" href="#193">193</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="194" href="#194">194</a> <em class="jxr_javadoccomment">     * @return your representation of your last row</em>
+<a class="jxr_linenumber" name="195" href="#195">195</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="196" href="#196">196</a>     byte[] lastRow();
+<a class="jxr_linenumber" name="197" href="#197">197</a>
+<a class="jxr_linenumber" name="198" href="#198">198</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="199" href="#199">199</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. Set this</em>
+<a class="jxr_linenumber" name="200" href="#200">200</a> <em class="jxr_javadoccomment">     * value to help the split code understand how to evenly divide the first</em>
+<a class="jxr_linenumber" name="201" href="#201">201</a> <em class="jxr_javadoccomment">     * region.</em>
+<a class="jxr_linenumber" name="202" href="#202">202</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="203" href="#203">203</a> <em class="jxr_javadoccomment">     * @param userInput</em>
+<a class="jxr_linenumber" name="204" href="#204">204</a> <em class="jxr_javadoccomment">     *          raw user input (may throw RuntimeException on parse failure)</em>
+<a class="jxr_linenumber" name="205" href="#205">205</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="206" href="#206">206</a>     <strong class="jxr_keyword">void</strong> setFirstRow(String userInput);
+<a class="jxr_linenumber" name="207" href="#207">207</a>
+<a class="jxr_linenumber" name="208" href="#208">208</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="209" href="#209">209</a> <em class="jxr_javadoccomment">     * In HBase, the last row is represented by an empty byte array. Set this</em>
+<a class="jxr_linenumber" name="210" href="#210">210</a> <em class="jxr_javadoccomment">     * value to help the split code understand how to evenly divide the last</em>
+<a class="jxr_linenumber" name="211" href="#211">211</a> <em class="jxr_javadoccomment">     * region. Note that this last row is inclusive for all rows sharing the</em>
+<a class="jxr_linenumber" name="212" href="#212">212</a> <em class="jxr_javadoccomment">     * same prefix.</em>
+<a class="jxr_linenumber" name="213" href="#213">213</a> <em class="jxr_javadoccomment">     *</em>
+<a class="jxr_linenumber" name="214" href="#214">214</a> <em class="jxr_javadoccomment">     * @param userInput</em>
+<a class="jxr_linenumber" name="215" href="#215">215</a> <em class="jxr_javadoccomment">     *          raw user input (may throw RuntimeException on parse failure)</em>
+<a class="jxr_linenumber" name="216" href="#216">216</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="217" href="#217">217</a>     <strong class="jxr_keyword">void</strong> setLastRow(String userInput);
+<a class="jxr_linenumber" name="218" href="#218">218</a>
+<a class="jxr_linenumber" name="219" href="#219">219</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="220" href="#220">220</a> <em class="jxr_javadoccomment">     * @param input</em>
+<a class="jxr_linenumber" name="221" href="#221">221</a> <em class="jxr_javadoccomment">     *          user or file input for row</em>
+<a class="jxr_linenumber" name="222" href="#222">222</a> <em class="jxr_javadoccomment">     * @return byte array representation of this row for HBase</em>
+<a class="jxr_linenumber" name="223" href="#223">223</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="224" href="#224">224</a>     byte[] strToRow(String input);
+<a class="jxr_linenumber" name="225" href="#225">225</a>
+<a class="jxr_linenumber" name="226" href="#226">226</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="227" href="#227">227</a> <em class="jxr_javadoccomment">     * @param row</em>
+<a class="jxr_linenumber" name="228" href="#228">228</a> <em class="jxr_javadoccomment">     *          byte array representing a row in HBase</em>
+<a class="jxr_linenumber" name="229" href="#229">229</a> <em class="jxr_javadoccomment">     * @return String to use for debug &amp;amp; file printing</em>
+<a class="jxr_linenumber" name="230" href="#230">230</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="231" href="#231">231</a>     String rowToStr(byte[] row);
+<a class="jxr_linenumber" name="232" href="#232">232</a>
+<a class="jxr_linenumber" name="233" href="#233">233</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="234" href="#234">234</a> <em class="jxr_javadoccomment">     * @return the separator character to use when storing / printing the row</em>
+<a class="jxr_linenumber" name="235" href="#235">235</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="236" href="#236">236</a>     String separator();
+<a class="jxr_linenumber" name="237" href="#237">237</a>
+<a class="jxr_linenumber" name="238" href="#238">238</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="239" href="#239">239</a> <em class="jxr_javadoccomment">     * Set the first row</em>
+<a class="jxr_linenumber" name="240" href="#240">240</a> <em class="jxr_javadoccomment">     * @param userInput byte array of the row key.</em>
+<a class="jxr_linenumber" name="241" href="#241">241</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="242" href="#242">242</a>     <strong class="jxr_keyword">void</strong> setFirstRow(byte[] userInput);
+<a class="jxr_linenumber" name="243" href="#243">243</a>
+<a class="jxr_linenumber" name="244" href="#244">244</a>     <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="245" href="#245">245</a> <em class="jxr_javadoccomment">     * Set the last row</em>
+<a class="jxr_linenumber" name="246" href="#246">246</a> <em class="jxr_javadoccomment">     * @param userInput byte array of the row key.</em>
+<a class="jxr_linenumber" name="247" href="#247">247</a> <em class="jxr_javadoccomment">     */</em>
+<a class="jxr_linenumber" name="248" href="#248">248</a>     <strong class="jxr_keyword">void</strong> setLastRow(byte[] userInput);
+<a class="jxr_linenumber" name="249" href="#249">249</a>   }
+<a class="jxr_linenumber" name="250" href="#250">250</a>
+<a class="jxr_linenumber" name="251" href="#251">251</a>   <em class="jxr_javadoccomment">/**</em>
+<a class="jxr_linenumber" name="252" href="#252">252</a> <em class="jxr_javadoccomment">   * The main function for the RegionSplitter application. Common uses:</em>
+<a class="jxr_linenumber" name="253" href="#253">253</a> <em class="jxr_javadoccomment">   * &lt;p&gt;</em>
+<a class="jxr_linenumber" name="254" href="#254">254</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="255" href="#255">255</a> <em class="jxr_javadoccomment">   * &lt;li&gt;create a table named 'myTable' with 60 pre-split regions containing 2</em>
+<a class="jxr_linenumber" name="256" href="#256">256</a> <em class="jxr_javadoccomment">   * column families 'test' &amp;amp; 'rs', assuming the keys are hex-encoded ASCII:</em>
+<a class="jxr_linenumber" name="257" href="#257">257</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="258" href="#258">258</a> <em class="jxr_javadoccomment">   * &lt;li&gt;bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs</em>
+<a class="jxr_linenumber" name="259" href="#259">259</a> <em class="jxr_javadoccomment">   * myTable HexStringSplit</em>
+<a class="jxr_linenumber" name="260" href="#260">260</a> <em class="jxr_javadoccomment">   * &lt;/ul&gt;</em>
+<a class="jxr_linenumber" name="261" href="#261">261</a> <em class="jxr_javadoccomment">   * &lt;li&gt;perform a rolling split of 'myTable' (i.e. 60 =&amp;gt; 120 regions), # 2</em>
+<a class="jxr_linenumber" name="262" href="#262">262</a> <em class="jxr_javadoccomment">   * outstanding splits at a time, assuming keys are uniformly distributed</em>
+<a class="jxr_linenumber" name="263" href="#263">263</a> <em class="jxr_javadoccomment">   * bytes:</em>
+<a class="jxr_linenumber" name="264" href="#264">264</a> <em class="jxr_javadoccomment">   * &lt;ul&gt;</em>
+<a class="jxr_linenumber" name="265" href="#265">265</a> <em class="jxr_javadoccomment">   * &lt;li&gt;bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable</em>
+<a class="jxr_linenumber" name="266" href="#266">266</a> <em class="jxr_javadoccomment">   * UniformSplit</em>
+<a class="jxr_linenumber" name="267" href="#267">267</a> <em class="jxr_javadoccomment">   * &lt;/ul&gt;</em>
 <a class="jxr_linenumber" name="268" href="#268">268</a> <em class="jxr_javadoccomment">   * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="269" href="#269">269</a> <em class="jxr_javadoccomment">   * &lt;/ul&gt;</em>
-<a class="jxr_linenumber" name="270" href="#270">270</a> <em class="jxr_javadoccomment">   *</em>
-<a class="jxr_linenumber" name="271" href="#271">271</a> <em class="jxr_javadoccomment">   * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit</em>
-<a class="jxr_linenumber" name="272" href="#272">272</a> <em class="jxr_javadoccomment">   * and UniformSplit. These are different strategies for choosing region</em>
-<a class="jxr_linenumber" name="273" href="#273">273</a> <em class="jxr_javadoccomment">   * boundaries. See their source code for details.</em>
-<a class="jxr_linenumber" name="274" href="#274">274</a> <em class="jxr_javadoccomment">   *</em>
-<a class="jxr_linenumber" name="275" href="#275">275</a> <em class="jxr_javadoccomment">   * @param args</em>
-<a class="jxr_linenumber" name="276" href="#276">276</a> <em class="jxr_javadoccomment">   *          Usage: RegionSplitter &amp;lt;TABLE&amp;gt; &amp;lt;SPLITALGORITHM&amp;gt;</em>
-<a class="jxr_linenumber" name="277" href="#277">277</a> <em class="jxr_javadoccomment">   *          &amp;lt;-c &amp;lt;# regions&amp;gt; -f &amp;lt;family:family:...&amp;gt; | -r</em>
-<a class="jxr_linenumber" name="278" href="#278">278</a> <em class="jxr_javadoccomment">   *          [-o &amp;lt;# outstanding splits&amp;gt;]&amp;gt;</em>
-<a class="jxr_linenumber" name="279" href="#279">279</a> <em class="jxr_javadoccomment">   *          [-D &amp;lt;conf.param=value&amp;gt;]</em>
-<a class="jxr_linenumber" name="280" href="#280">280</a> <em class="jxr_javadoccomment">   * @throws IOException</em>
-<a class="jxr_linenumber" name="281" href="#281">281</a> <em class="jxr_javadoccomment">   *           HBase IO problem</em>
-<a class="jxr_linenumber" name="282" href="#282">282</a> <em class="jxr_javadoccomment">   * @throws InterruptedException</em>
-<a class="jxr_linenumber" name="283" href="#283">283</a> <em class="jxr_javadoccomment">   *           user requested exit</em>
-<a class="jxr_linenumber" name="284" href="#284">284</a> <em class="jxr_javadoccomment">   * @throws ParseException</em>
-<a class="jxr_linenumber" name="285" href="#285">285</a> <em class="jxr_javadoccomment">   *           problem parsing user input</em>
-<a class="jxr_linenumber" name="286" href="#286">286</a> <em class="jxr_javadoccomment">   */</em>
-<a class="jxr_linenumber" name="287" href="#287">287</a>   @SuppressWarnings(<span class="jxr_string">"static-access"</span>)
-<a class="jxr_linenumber" name="288" href="#288">288</a>   <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> main(String[] args) <strong class="jxr_keyword">throws</strong> IOException,
-<a class="jxr_linenumber" name="289" href="#289">289</a>       InterruptedException, ParseException {
-<a class="jxr_linenumber" name="290" href="#290">290</a>     Configuration conf = HBaseConfiguration.create();
-<a class="jxr_linenumber" name="291" href="#291">291</a> 
-<a class="jxr_linenumber" name="292" href="#292">292</a>     <em class="jxr_comment">// parse user input</em>
-<a class="jxr_linenumber" name="293" href="#293">293</a>     Options opt = <strong class="jxr_keyword">new</strong> Options();
-<a class="jxr_linenumber" name="294" href="#294">294</a>     opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"property=value"</span>).hasArg()
-<a class="jxr_linenumber" name="295" href="#295">295</a>         .withDescription(<span class="jxr_string">"Override HBase Configuration Settings"</span>).create(<span class="jxr_string">"D"</span>));
-<a class="jxr_linenumber" name="296" href="#296">296</a>     opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"region count"</span>).hasArg()
-<a class="jxr_linenumber" name="297" href="#297">297</a>         .withDescription(
-<a class="jxr_linenumber" name="298" href="#298">298</a>             <span class="jxr_string">"Create a new table with a pre-split number of regions"</span>)
-<a class="jxr_linenumber" name="299" href="#299">299</a>         .create(<span class="jxr_string">"c"</span>));
-<a class="jxr_linenumber" name="300" href="#300">300</a>     opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"family:family:..."</span>).hasArg()
-<a class="jxr_linenumber" name="301" href="#301">301</a>         .withDescription(
-<a class="jxr_linenumber" name="302" href="#302">302</a>             <span class="jxr_string">"Column Families to create with new table.  Required with -c"</span>)
-<a class="jxr_linenumber" name="303" href="#303">303</a>         .create(<span class="jxr_string">"f"</span>));
-<a class="jxr_linenumber" name="304" href="#304">304</a>     opt.addOption(<span class="jxr_string">"h"</span>, false, <span class="jxr_string">"Print this usage help"</span>);
-<a class="jxr_linenumber" name="305" href="#305">305</a>     opt.addOption(<span class="jxr_string">"r"</span>, false, <span class="jxr_string">"Perform a rolling split of an existing region"</span>);
-<a class="jxr_linenumber" name="306" href="#306">306</a>     opt.addOption(OptionBuilder.withArgName(<span class="jxr_string">"count"</span>).hasArg().withDescription(
-<a class="jxr_linenumber" name="307" href="#307">307</a>         <span class="jxr_string">"Max outstanding splits that have unfinished major compactions"</span>)
-<a class="jxr_linenumber" name="308" href="#308">308</a>         .create(<span class="jxr_string">"o"</span>));
-<a class="jxr_linenumber" name="309" href="#309">309</a>     opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"firstrow"</span>, <strong class="jxr_keyword">true</strong>,
-<a class="jxr_linenumber" name="310" href="#310">310</a>         <span class="jxr_string">"First Row in Table for Split Algorithm"</span>);
-<a class="jxr_linenumber" name="311" href="#311">311</a>     opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"lastrow"</span>, <strong class="jxr_keyword">true</strong>,
-<a class="jxr_linenumber" name="312" href="#312">312</a>         <span class="jxr_string">"Last Row in Table for Split Algorithm"</span>);
-<a class="jxr_linenumber" name="313" href="#313">313</a>     opt.addOption(<strong class="jxr_keyword">null</strong>, <span class="jxr_string">"risky"</span>, false,
-<a class="jxr_linenumber" name="314" href="#314">314</a>         <span class="jxr_string">"Skip verification steps to complete quickly."</span>
-<a class="jxr_linenumber" name="315" href="#315">315</a>             + <span class="jxr_string">"STRONGLY DISCOURAGED for production systems.  "</span>);
-<a class="jxr_linenumber" name="316" href="#316">316</a>     CommandLine cmd = <strong class="jxr_keyword">new</strong> GnuParser().parse(opt, args);
-<a class="jxr_linenumber" name="317" href="#317">317</a> 
-<a class="jxr_linenumber" name="318" href="#318">318</a>     <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"D"</span>)) {
-<a class="jxr_linenumber" name="319" href="#319">319</a>       <strong class="jxr_keyword">for</strong> (String confOpt : cmd.getOptionValues(<span class="jxr_string">"D"</span>)) {
-<a class="jxr_linenumber" name="320" href="#320">320</a>         String[] kv = confOpt.split(<span class="jxr_string">"="</span>, 2);
-<a class="jxr_linenumber" name="321" href="#321">321</a>         <strong class="jxr_keyword">if</strong> (kv.length == 2) {
-<a class="jxr_linenumber" name="322" href="#322">322</a>           conf.set(kv[0], kv[1]);
-<a class="jxr_linenumber" name="323" href="#323">323</a>           LOG.debug(<span class="jxr_string">"-D configuration override: "</span> + kv[0] + <span class="jxr_string">"="</span> + kv[1]);
-<a class="jxr_linenumber" name="324" href="#324">324</a>         } <strong class="jxr_keyword">else</strong> {
-<a class="jxr_linenumber" name="325" href="#325">325</a>           <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> ParseException(<span class="jxr_string">"-D option format invalid: "</span> + confOpt);
-<a class="jxr_linenumber" name="326" href="#326">326</a>         }
-<a class="jxr_linenumber" name="327" href="#327">327</a>       }
-<a class="jxr_linenumber" name="328" href="#328">328</a>     }
-<a class="jxr_linenumber" name="329" href="#329">329</a> 
-<a class="jxr_linenumber" name="330" href="#330">330</a>     <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"risky"</span>)) {
-<a class="jxr_linenumber" name="331" href="#331">331</a>       conf.setBoolean(<span class="jxr_string">"split.verify"</span>, false);
-<a class="jxr_linenumber" name="332" href="#332">332</a>     }
-<a class="jxr_linenumber" name="333" href="#333">333</a> 
-<a class="jxr_linenumber" name="334" href="#334">334</a>     <strong class="jxr_keyword">boolean</strong> createTable = cmd.hasOption(<span class="jxr_string">"c"</span>) &amp;&amp; cmd.hasOption(<span class="jxr_string">"f"</span>);
-<a class="jxr_linenumber" name="335" href="#335">335</a>     <strong class="jxr_keyword">boolean</strong> rollingSplit = cmd.hasOption(<span class="jxr_string">"r"</span>);
-<a class="jxr_linenumber" name="336" href="#336">336</a>     <strong class="jxr_keyword">boolean</strong> oneOperOnly = createTable ^ rollingSplit;
-<a class="jxr_linenumber" name="337" href="#337">337</a> 
-<a class="jxr_linenumber" name="338" href="#338">338</a>     <strong class="jxr_keyword">if</strong> (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption(<span class="jxr_string">"h"</span>)) {
-<a class="jxr_linenumber" name="339" href="#339">339</a>       <strong class="jxr_keyword">new</strong> HelpFormatter().printHelp(<span class="jxr_string">"RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;\n"</span>+
-<a class="jxr_linenumber" name="340" href="#340">340</a>           <span class="jxr_string">"SPLITALGORITHM is a java class name of a class implementing "</span> +
-<a class="jxr_linenumber" name="341" href="#341">341</a>           <span class="jxr_string">"SplitAlgorithm, or one of the special strings HexStringSplit "</span> +
-<a class="jxr_linenumber" name="342" href="#342">342</a>           <span class="jxr_string">"or UniformSplit, which are built-in split algorithms. "</span> +
-<a class="jxr_linenumber" name="343" href="#343">343</a>           <span class="jxr_string">"HexStringSplit treats keys as hexadecimal ASCII, and "</span> +
-<a class="jxr_linenumber" name="344" href="#344">344</a>           <span class="jxr_string">"UniformSplit treats keys as arbitrary bytes."</span>, opt);
-<a class="jxr_linenumber" name="345" href="#345">345</a>       <strong class="jxr_keyword">return</strong>;
-<a class="jxr_linenumber" name="346" href="#346">346</a>     }
-<a class="jxr_linenumber" name="347" href="#347">347</a>     <a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName = TableName.valueOf(cmd.getArgs()[0]);
-<a class="jxr_linenumber" name="348" href="#348">348</a>     String splitClass = cmd.getArgs()[1];
-<a class="jxr_linenumber" name="349" href="#349">349</a>     <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo = newSplitAlgoInstance(conf, splitClass);
-<a class="jxr_linenumber" name="350" href="#350">350</a> 
-<a class="jxr_linenumber" name="351" href="#351">351</a>     <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"firstrow"</span>)) {
-<a class="jxr_linenumber" name="352" href="#352">352</a>       splitAlgo.setFirstRow(cmd.getOptionValue(<span class="jxr_string">"firstrow"</span>));
-<a class="jxr_linenumber" name="353" href="#353">353</a>     }
-<a class="jxr_linenumber" name="354" href="#354">354</a>     <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"lastrow"</span>)) {
-<a class="jxr_linenumber" name="355" href="#355">355</a>       splitAlgo.setLastRow(cmd.getOptionValue(<span class="jxr_string">"lastrow"</span>));
-<a class="jxr_linenumber" name="356" href="#356">356</a>     }
-<a class="jxr_linenumber" name="357" href="#357">357</a> 
-<a class="jxr_linenumber" name="358" href="#358">358</a>     <strong class="jxr_keyword">if</strong> (createTable) {
-<a class="jxr_linenumber" name="359" href="#359">359</a>       conf.set(<span class="jxr_string">"split.count"</span>, cmd.getOptionValue(<span class="jxr_string">"c"</span>));
-<a class="jxr_linenumber" name="360" href="#360">360</a>       createPresplitTable(tableName, splitAlgo, cmd.getOptionValue(<span class="jxr_string">"f"</span>).split(<span class="jxr_string">":"</span>), conf);
-<a class="jxr_linenumber" name="361" href="#361">361</a>     }
-<a class="jxr_linenumber" name="362" href="#362">362</a> 
-<a class="jxr_linenumber" name="363" href="#363">363</a>     <strong class="jxr_keyword">if</strong> (rollingSplit) {
-<a class="jxr_linenumber" name="364" href="#364">364</a>       <strong class="jxr_keyword">if</strong> (cmd.hasOption(<span class="jxr_string">"o"</span>)) {
-<a class="jxr_linenumber" name="365" href="#365">365</a>         conf.set(<span class="jxr_string">"split.outstanding"</span>, cmd.getOptionValue(<span class="jxr_string">"o"</span>));
-<a class="jxr_linenumber" name="366" href="#366">366</a>       }
-<a class="jxr_linenumber" name="367" href="#367">367</a>       rollingSplit(tableName, splitAlgo, conf);
-<a class="jxr_linenumber" name="368" href="#368">368</a>     }
-<a class="jxr_linenumber" name="369" href="#369">369</a>   }
-<a class="jxr_linenumber" name="370" href="#370">370</a> 
-<a class="jxr_linenumber" name="371" href="#371">371</a>   <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> createPresplitTable(<a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName, <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo,
-<a class="jxr_linenumber" name="372" href="#372">372</a>           String[] columnFamilies, Configuration conf)
-<a class="jxr_linenumber" name="373" href="#373">373</a>   <strong class="jxr_keyword">throws</strong> IOException, InterruptedException {
-<a class="jxr_linenumber" name="374" href="#374">374</a>     <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> splitCount = conf.getInt(<span class="jxr_string">"split.count"</span>, 0);
-<a class="jxr_linenumber" name="375" href="#375">375</a>     Preconditions.checkArgument(splitCount &gt; 1, <span class="jxr_string">"Split count must be &gt; 1"</span>);
-<a class="jxr_linenumber" name="376" href="#376">376</a> 
-<a class="jxr_linenumber" name="377" href="#377">377</a>     Preconditions.checkArgument(columnFamilies.length &gt; 0,
-<a class="jxr_linenumber" name="378" href="#378">378</a>         <span class="jxr_string">"Must specify at least one column family. "</span>);
-<a class="jxr_linenumber" name="379" href="#379">379</a>     LOG.debug(<span class="jxr_string">"Creating table "</span> + tableName + <span class="jxr_string">" with "</span> + columnFamilies.length
-<a class="jxr_linenumber" name="380" href="#380">380</a>         + <span class="jxr_string">" column families.  Presplitting to "</span> + splitCount + <span class="jxr_string">" regions"</span>);
-<a class="jxr_linenumber" name="381" href="#381">381</a> 
-<a class="jxr_linenumber" name="382" href="#382">382</a>     <a href="../../../../../org/apache/hadoop/hbase/HTableDescriptor.html">HTableDescriptor</a> desc = <strong class="jxr_keyword">new</strong> <a href="../../../../../org/apache/hadoop/hbase/HTableDescriptor.html">HTableDescriptor</a>(tableName);
-<a class="jxr_linenumber" name="383" href="#383">383</a>     <strong class="jxr_keyword">for</strong> (String cf : columnFamilies) {
-<a class="jxr_linenumber" name="384" href="#384">384</a>       desc.addFamily(<strong class="jxr_keyword">new</strong> <a href="../../../../../org/apache/hadoop/hbase/HColumnDescriptor.html">HColumnDescriptor</a>(Bytes.toBytes(cf)));
-<a class="jxr_linenumber" name="385" href="#385">385</a>     }
-<a class="jxr_linenumber" name="386" href="#386">386</a>     <strong class="jxr_keyword">try</strong> (Connection connection = ConnectionFactory.createConnection(conf)) {
-<a class="jxr_linenumber" name="387" href="#387">387</a>       <a href="../../../../../org/apache/hadoop/hbase/client/Admin.html">Admin</a> admin = connection.getAdmin();
-<a class="jxr_linenumber" name="388" href="#388">388</a>       <strong class="jxr_keyword">try</strong> {
-<a class="jxr_linenumber" name="389" href="#389">389</a>         Preconditions.checkArgument(!admin.tableExists(tableName),
-<a class="jxr_linenumber" name="390" href="#390">390</a>           <span class="jxr_string">"Table already exists: "</span> + tableName);
-<a class="jxr_linenumber" name="391" href="#391">391</a>         admin.createTable(desc, splitAlgo.split(splitCount));
-<a class="jxr_linenumber" name="392" href="#392">392</a>       } <strong class="jxr_keyword">finally</strong> {
-<a class="jxr_linenumber" name="393" href="#393">393</a>         admin.close();
-<a class="jxr_linenumber" name="394" href="#394">394</a>       }
-<a class="jxr_linenumber" name="395" href="#395">395</a>       LOG.debug(<span class="jxr_string">"Table created!  Waiting for regions to show online in META..."</span>);
-<a class="jxr_linenumber" name="396" href="#396">396</a>       <strong class="jxr_keyword">if</strong> (!conf.getBoolean(<span class="jxr_string">"split.verify"</span>, <strong class="jxr_keyword">true</strong>)) {
-<a class="jxr_linenumber" name="397" href="#397">397</a>         <em class="jxr_comment">// NOTE: createTable is synchronous on the table, but not on the regions</em>
-<a class="jxr_linenumber" name="398" href="#398">398</a>         <strong class="jxr_keyword">int</strong> onlineRegions = 0;
-<a class="jxr_linenumber" name="399" href="#399">399</a>         <strong class="jxr_keyword">while</strong> (onlineRegions &lt; splitCount) {
-<a class="jxr_linenumber" name="400" href="#400">400</a>           onlineRegions = MetaTableAccessor.getRegionCount(connection, tableName);
-<a class="jxr_linenumber" name="401" href="#401">401</a>           LOG.debug(onlineRegions + <span class="jxr_string">" of "</span> + splitCount + <span class="jxr_string">" regions online..."</span>);
-<a class="jxr_linenumber" name="402" href="#402">402</a>           <strong class="jxr_keyword">if</strong> (onlineRegions &lt; splitCount) {
-<a class="jxr_linenumber" name="403" href="#403">403</a>             Thread.sleep(10 * 1000); <em class="jxr_comment">// sleep</em>
-<a class="jxr_linenumber" name="404" href="#404">404</a>           }
-<a class="jxr_linenumber" name="405" href="#405">405</a>         }
-<a class="jxr_linenumber" name="406" href="#406">406</a>       }
-<a class="jxr_linenumber" name="407" href="#407">407</a>       LOG.debug(<span class="jxr_string">"Finished creating table with "</span> + splitCount + <span class="jxr_string">" regions"</span>);
-<a class="jxr_linenumber" name="408" href="#408">408</a>     }
-<a class="jxr_linenumber" name="409" href="#409">409</a>   }
-<a class="jxr_linenumber" name="410" href="#410">410</a> 
-<a class="jxr_linenumber" name="411" href="#411">411</a>   <em class="jxr_javadoccomment">/**</em>
-<a class="jxr_linenumber" name="412" href="#412">412</a> <em class="jxr_javadoccomment">   * Alternative getCurrentNrHRS which is no longer available.</em>
-<a class="jxr_linenumber" name="413" href="#413">413</a> <em class="jxr_javadoccomment">   * @param connection</em>
-<a class="jxr_linenumber" name="414" href="#414">414</a> <em class="jxr_javadoccomment">   * @return Rough count of regionservers out on cluster.</em>
-<a class="jxr_linenumber" name="415" href="#415">415</a> <em class="jxr_javadoccomment">   * @throws IOException </em>
-<a class="jxr_linenumber" name="416" href="#416">416</a> <em class="jxr_javadoccomment">   */</em>
-<a class="jxr_linenumber" name="417" href="#417">417</a>   <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">int</strong> getRegionServerCount(<strong class="jxr_keyword">final</strong> <a href="../../../../../org/apache/hadoop/hbase/client/Connection.html">Connection</a> connection) <strong class="jxr_keyword">throws</strong> IOException {
-<a class="jxr_linenumber" name="418" href="#418">418</a>     <strong class="jxr_keyword">try</strong> (Admin admin = connection.getAdmin()) {
-<a class="jxr_linenumber" name="419" href="#419">419</a>       <a href="../../../../../org/apache/hadoop/hbase/ClusterStatus.html">ClusterStatus</a> status = admin.getClusterStatus();
-<a class="jxr_linenumber" name="420" href="#420">420</a>       Collection&lt;ServerName&gt; servers = status.getServers();
-<a class="jxr_linenumber" name="421" href="#421">421</a>       <strong class="jxr_keyword">return</strong> servers == <strong class="jxr_keyword">null</strong> || servers.isEmpty()? 0: servers.size();
-<a class="jxr_linenumber" name="422" href="#422">422</a>     }
-<a class="jxr_linenumber" name="423" href="#423">423</a>   }
-<a class="jxr_linenumber" name="424" href="#424">424</a> 
-<a class="jxr_linenumber" name="425" href="#425">425</a>   <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> byte [] readFile(<strong class="jxr_keyword">final</strong> FileSystem fs, <strong class="jxr_keyword">final</strong> Path path) <strong class="jxr_keyword">throws</strong> IOException {
-<a class="jxr_linenumber" name="426" href="#426">426</a>     FSDataInputStream tmpIn = fs.open(path);
-<a class="jxr_linenumber" name="427" href="#427">427</a>     <strong class="jxr_keyword">try</strong> {
-<a class="jxr_linenumber" name="428" href="#428">428</a>       byte [] rawData = <strong class="jxr_keyword">new</strong> byte[tmpIn.available()];
-<a class="jxr_linenumber" name="429" href="#429">429</a>       tmpIn.readFully(rawData);
-<a class="jxr_linenumber" name="430" href="#430">430</a>       <strong class="jxr_keyword">return</strong> rawData;
-<a class="jxr_linenumber" name="431" href="#431">431</a>     } <strong class="jxr_keyword">finally</strong> {
-<a class="jxr_linenumber" name="432" href="#432">432</a>       tmpIn.close();
-<a class="jxr_linenumber" name="433" href="#433">433</a>     }
-<a class="jxr_linenumber" name="434" href="#434">434</a>   }
-<a class="jxr_linenumber" name="435" href="#435">435</a> 
-<a class="jxr_linenumber" name="436" href="#436">436</a>   <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">void</strong> rollingSplit(<a href="../../../../../org/apache/hadoop/hbase/TableName.html">TableName</a> tableName, <a href="../../../../../org/apache/hadoop/hbase/util/RegionSplitter.html">SplitAlgorithm</a> splitAlgo, Configuration conf)
-<a class="jxr_linenumber" name="437" href="#437">437</a>   <strong class="jxr_keyword">throws</strong> IOException, InterruptedException {
-<a class="jxr_linenumber" name="438" href="#438">438</a>     <strong class="jxr_keyword">final</strong> <strong class="jxr_keyword">int</strong> minOS = conf.getInt(<span class="jxr_string">"split.outstanding"</span>, 2);
-<a class="jxr_linenumber" name="439" href="#439">439</a>     <strong cla

<TRUNCATED>

Mime
View raw message