hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1082267 - in /hbase/branches/0.90: CHANGES.txt bin/loadtable.rb src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
Date Wed, 16 Mar 2011 19:14:52 GMT
Author: stack
Date: Wed Mar 16 19:14:51 2011
New Revision: 1082267

URL: http://svn.apache.org/viewvc?rev=1082267&view=rev
Log:
HBASE-3440 Clean out load_table.rb and make sure all roads lead to completebulkload tool --
REVERTEDvi CHANGES.txtvi CHANGES.txt

Modified:
    hbase/branches/0.90/CHANGES.txt
    hbase/branches/0.90/bin/loadtable.rb
    hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
    hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1082267&r1=1082266&r2=1082267&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Wed Mar 16 19:14:51 2011
@@ -52,8 +52,6 @@ Release 0.90.2 - February 9th, 2011
    HBASE-3496  HFile CLI Improvements
    HBASE-3596  [replication] Wait a few seconds before transferring queues
    HBASE-3600  Update our jruby to 1.6.0
-   HBASE-3440  Clean out load_table.rb and make sure all roads lead to
-               completebulkload tool (Vidhyashankar Venkataraman via Stack)
    HBASE-3653  Parallelize Server Requests on HBase Client
 
 

Modified: hbase/branches/0.90/bin/loadtable.rb
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/bin/loadtable.rb?rev=1082267&r1=1082266&r2=1082267&view=diff
==============================================================================
--- hbase/branches/0.90/bin/loadtable.rb (original)
+++ hbase/branches/0.90/bin/loadtable.rb Wed Mar 16 19:14:51 2011
@@ -17,4 +17,134 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-puts 'DISABLED!!!! Use completebulkload instead.  See tail of http://hbase.apache.org/bulk-loads.html'
+# Script that takes over from org.apache.hadoop.hbase.mapreduce.HFileOutputFormat.
+# Pass it output directory of HFileOutputFormat. It will read the passed files,
+# move them into place and update the catalog table appropriately.  Warning:
+# it will overwrite anything that exists already for passed table.
+# It expects hbase to be up and running so it can insert table info.
+#
+# To see usage for this script, run: 
+#
+#  ${HBASE_HOME}/bin/hbase org.jruby.Main loadtable.rb
+#
+include Java
+import java.util.TreeMap
+import org.apache.hadoop.hbase.client.HTable
+import org.apache.hadoop.hbase.client.Put
+import org.apache.hadoop.hbase.util.FSUtils
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.util.Writables
+import org.apache.hadoop.hbase.HConstants
+import org.apache.hadoop.hbase.HBaseConfiguration
+import org.apache.hadoop.hbase.HRegionInfo
+import org.apache.hadoop.hbase.HTableDescriptor
+import org.apache.hadoop.hbase.HColumnDescriptor
+import org.apache.hadoop.hbase.HRegionInfo
+import org.apache.hadoop.hbase.io.hfile.HFile
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.mapred.OutputLogFilter
+import org.apache.commons.logging.Log
+import org.apache.commons.logging.LogFactory
+
+# Name of this script
+NAME = "loadtable"
+
+# Print usage for this script
+def usage
+  puts 'Usage: %s.rb TABLENAME HFILEOUTPUTFORMAT_OUTPUT_DIR' % NAME
+  exit!
+end
+
+# Passed 'dir' exists and is a directory else exception
+def isDirExists(fs, dir)
+  raise IOError.new("Does not exit: " + dir.toString()) unless fs.exists(dir)
+  raise IOError.new("Not a directory: " + dir.toString()) unless fs.isDirectory(dir)
+end
+
+# Check arguments
+if ARGV.size != 2
+  usage
+end
+
+# Check good table names were passed.
+tableName = HTableDescriptor.isLegalTableName(ARGV[0].to_java_bytes)
+outputdir = Path.new(ARGV[1])
+
+# Get configuration to use.
+c = HBaseConfiguration.new()
+# Get a logger and a metautils instance.
+LOG = LogFactory.getLog(NAME)
+
+# Set hadoop filesystem configuration using the hbase.rootdir.
+# Otherwise, we'll always use localhost though the hbase.rootdir
+# might be pointing at hdfs location.
+c.set("fs.defaultFS", c.get(HConstants::HBASE_DIR))
+fs = FileSystem.get(c)
+
+# If hfiles directory does not exist, exit.
+isDirExists(fs, outputdir)
+# Create table dir if it doesn't exist.
+rootdir = FSUtils.getRootDir(c)
+tableDir = Path.new(rootdir, Path.new(Bytes.toString(tableName)))
+fs.mkdirs(tableDir) unless fs.exists(tableDir)
+
+# Start. Per hfile, move it, and insert an entry in catalog table.
+families = fs.listStatus(outputdir, OutputLogFilter.new())
+throw IOError.new("Can do one family only") if families.length > 1
+# Read meta on all files. Put in map keyed by start key.
+map = TreeMap.new(Bytes::ByteArrayComparator.new())
+family = families[0]
+# Make sure this subdir exists under table
+hfiles = fs.listStatus(family.getPath())
+LOG.info("Found " + hfiles.length.to_s + " hfiles");
+count = 0
+for hfile in hfiles
+  reader = HFile::Reader.new(fs, hfile.getPath(), nil, false)
+  begin
+    fileinfo = reader.loadFileInfo() 
+    firstkey = reader.getFirstKey()
+    # First key is row/column/ts.  We just want the row part.
+    rowlen = Bytes.toShort(firstkey)
+    firstkeyrow = firstkey[2, rowlen] 
+    LOG.info(count.to_s + " read firstkey of " +
+      Bytes.toString(firstkeyrow) + " from " + hfile.getPath().toString())
+    map.put(firstkeyrow, [hfile, fileinfo])
+    count = count + 1
+  ensure
+    reader.close()
+  end
+end
+# Now I have sorted list of fileinfo+paths.  Start insert.
+# Get a client on catalog table.
+meta = HTable.new(c, HConstants::META_TABLE_NAME)
+# I can't find out from hfile how its compressed.
+# Using all defaults. Change manually after loading if
+# something else wanted in column or table attributes.
+familyName = family.getPath().getName()
+hcd = HColumnDescriptor.new(familyName)
+htd = HTableDescriptor.new(tableName)
+htd.addFamily(hcd)
+previouslastkey = HConstants::EMPTY_START_ROW
+count = map.size()
+for i in map.descendingKeySet().iterator()
+  tuple = map.get(i)
+  startkey = i
+  count = count - 1
+  # If last time through loop, set start row as EMPTY_START_ROW
+  startkey = HConstants::EMPTY_START_ROW unless count > 0
+  # Next time around, lastkey is this startkey
+  hri = HRegionInfo.new(htd, startkey, previouslastkey)  
+  previouslastkey = startkey 
+  LOG.info(hri.toString())
+  hfile = tuple[0].getPath()
+  rdir = Path.new(Path.new(tableDir, hri.getEncodedName().to_s), familyName)
+  fs.mkdirs(rdir)
+  tgt = Path.new(rdir, hfile.getName())
+  fs.rename(hfile, tgt)
+  LOG.info("Moved " + hfile.toString() + " to " + tgt.toString())
+  p = Put.new(hri.getRegionName())
+  p.add(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER, Writables.getBytes(hri))
+  meta.put(p)
+  LOG.info("Inserted " + hri.toString())
+end

Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java?rev=1082267&r1=1082266&r2=1082267&view=diff
==============================================================================
--- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
(original)
+++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
Wed Mar 16 19:14:51 2011
@@ -24,7 +24,6 @@ import java.io.IOException;
 import java.util.Deque;
 import java.util.LinkedList;
 import java.util.Map;
-import java.util.ArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -52,11 +51,6 @@ import org.apache.hadoop.hbase.regionser
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import java.util.TreeMap;
-
 
 /**
  * Tool to load the output of HFileOutputFormat into an existing table.
@@ -64,21 +58,14 @@ import java.util.TreeMap;
  */
 public class LoadIncrementalHFiles extends Configured implements Tool {
 
-  private static Log LOG = LogFactory.getLog(LoadIncrementalHFiles.class);
-  private static final int  TABLE_CREATE_MAX_RETRIES = 20;
-  private static final long TABLE_CREATE_SLEEP = 60000;
-  private HBaseAdmin hbAdmin;
+  static Log LOG = LogFactory.getLog(LoadIncrementalHFiles.class);
 
   public static String NAME = "completebulkload";
 
-  public LoadIncrementalHFiles(Configuration conf) throws Exception {
+  public LoadIncrementalHFiles(Configuration conf) {
     super(conf);
-    this.hbAdmin = new HBaseAdmin(conf);
   }
 
-  /* This constructor does not add HBase configuration. 
-   * Explicit addition is necessary. Do we need this constructor? 
-   */
   public LoadIncrementalHFiles() {
     super();
   }
@@ -312,125 +299,6 @@ public class LoadIncrementalHFiles exten
     return !HFile.isReservedFileInfoKey(key);
   }
 
-  private boolean doesTableExist(String tableName) throws Exception {
-    return hbAdmin.tableExists(tableName);
-  }
-  
-  /*
-   * Infers region boundaries for a new table.
-   * Parameter:
-   *   bdryMap is a map between keys to an integer belonging to {+1, -1}
-   *     If a key is a start key of a file, then it maps to +1
-   *     If a key is an end key of a file, then it maps to -1
-   * Algo:
-   * 1) Poll on the keys in order: 
-   *    a) Keep adding the mapped values to these keys (runningSum) 
-   *    b) Each time runningSum reaches 0, add the start Key from when the runningSum had
started to a boundary list.
-   * 2) Return the boundary list. 
-   */
-  public static byte[][] inferBoundaries(TreeMap<byte[], Integer> bdryMap) {
-    ArrayList<byte[]> keysArray = new ArrayList<byte[]>();
-    int runningValue = 0;
-    byte[] currStartKey = null;
-    boolean firstBoundary = true;
-    
-    for (Map.Entry<byte[], Integer> item: bdryMap.entrySet()) {
-      if (runningValue == 0) currStartKey = item.getKey();
-      runningValue += item.getValue();
-      if (runningValue == 0) {
-        if (!firstBoundary) keysArray.add(currStartKey);
-        firstBoundary = false;
-      } 
-    }
-    
-    return keysArray.toArray(new byte[0][0]);
-  }
- 
-  /*
-   * If the table is created for the first time, then "completebulkload" reads the files
twice.
-   * More modifications necessary if we want to avoid doing it.
-   */
-  private void createTable(String tableName, String dirPath) throws Exception {
-    Path hfofDir = new Path(dirPath);
-    FileSystem fs = hfofDir.getFileSystem(getConf());
-
-    if (!fs.exists(hfofDir)) {
-      throw new FileNotFoundException("HFileOutputFormat dir " +
-          hfofDir + " not found");
-    }
-
-    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
-    if (familyDirStatuses == null) {
-      throw new FileNotFoundException("No families found in " + hfofDir);
-    }
-
-    HTableDescriptor htd = new HTableDescriptor(tableName);
-    HColumnDescriptor hcd = null;
-
-    // Add column families
-    // Build a set of keys
-    byte[][] keys = null;
-    TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
-    
-    for (FileStatus stat : familyDirStatuses) {
-      if (!stat.isDir()) {
-        LOG.warn("Skipping non-directory " + stat.getPath());
-        continue;
-      }
-      Path familyDir = stat.getPath();
-      // Skip _logs, etc
-      if (familyDir.getName().startsWith("_")) continue;
-      byte[] family = familyDir.getName().getBytes();
-     
-      hcd = new HColumnDescriptor(family);
-      htd.addFamily(hcd);
-      
-      Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
-      for (Path hfile : hfiles) {
-        if (hfile.getName().startsWith("_")) continue;
-        
-        HFile.Reader reader = new HFile.Reader(fs, hfile, null, false, false);
-        final byte[] first, last;
-        try {
-          reader.loadFileInfo();
-          first = reader.getFirstRowKey();
-          last =  reader.getLastRowKey();
-
-          LOG.info("Trying to figure out region boundaries hfile=" + hfile +
-            " first=" + Bytes.toStringBinary(first) +
-            " last="  + Bytes.toStringBinary(last));
-          
-          // To eventually infer start key-end key boundaries
-          Integer value = map.containsKey(first)?(Integer)map.get(first):0;
-          map.put(first, value+1);
-
-          value = map.containsKey(last)?(Integer)map.get(last):0;
-          map.put(last, value-1);
-        }  finally {
-          reader.close();
-        }
-      }
-    }
-    
-    keys = LoadIncrementalHFiles.inferBoundaries(map);
-    try {    
-      this.hbAdmin.createTableAsync(htd, keys);
-    } catch (java.net.SocketTimeoutException e) {
-      System.err.println("Caught Socket timeout.. Mostly caused by a slow region assignment
by master!");
-    }
-         
-    HTable table = new HTable(tableName);
-
-    HConnection conn = table.getConnection();
-    int ctr = 0;
-    while (!conn.isTableAvailable(table.getTableName()) && (ctr<TABLE_CREATE_MAX_RETRIES))
{
-      LOG.info("Table " + tableName + "not yet available... Sleeping for 60 more seconds...");
-      /* Every TABLE_CREATE_SLEEP milliseconds, wakes up and checks if the table is available*/
-      Thread.sleep(TABLE_CREATE_SLEEP);
-      ctr++;
-    }
-    LOG.info("Table "+ tableName +" is finally available!!");
-  }
 
   @Override
   public int run(String[] args) throws Exception {
@@ -439,21 +307,15 @@ public class LoadIncrementalHFiles exten
       return -1;
     }
 
-    String dirPath   = args[0];
-    String tableName = args[1];
-    
-    boolean tableExists   = this.doesTableExist(tableName);
-    if (!tableExists) this.createTable(tableName,dirPath);
-    
-    Path hfofDir = new Path(dirPath);
-    HTable table = new HTable(tableName);
+    Path hfofDir = new Path(args[0]);
+    HTable table = new HTable(this.getConf(), args[1]);
 
     doBulkLoad(hfofDir, table);
     return 0;
   }
 
   public static void main(String[] args) throws Exception {
-    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args);
+    ToolRunner.run(new LoadIncrementalHFiles(), args);
   }
 
 }

Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java?rev=1082267&r1=1082266&r2=1082267&view=diff
==============================================================================
--- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
(original)
+++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java
Wed Mar 16 19:14:51 2011
@@ -22,8 +22,6 @@ package org.apache.hadoop.hbase.mapreduc
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.TreeMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -188,70 +186,4 @@ public class TestLoadIncrementalHFiles {
       writer.close();
     }
   }
-
-  private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[]
last) {
-    Integer value = map.containsKey(first)?(Integer)map.get(first):0;
-    map.put(first, value+1);
-
-    value = map.containsKey(last)?(Integer)map.get(last):0;
-    map.put(last, value-1);
-  }
-
-  @Test 
-  public void testInferBoundaries() {
-    TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
-
-    /* Toy example
-     *     c---------i            o------p          s---------t     v------x
-     * a------e    g-----k   m-------------q   r----s            u----w
-     *
-     * Should be inferred as:
-     * a-----------------k   m-------------q   r--------------t  u---------x
-     * 
-     * The output should be (m,r,u) 
-     */
-
-    String first;
-    String last;
-
-    first = "a"; last = "e";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-    
-    first = "r"; last = "s";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "o"; last = "p";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "g"; last = "k";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "v"; last = "x";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "c"; last = "i";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "m"; last = "q";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    first = "s"; last = "t";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-    
-    first = "u"; last = "w";
-    addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
-
-    byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
-    byte[][] compare = new byte[3][];
-    compare[0] = "m".getBytes();
-    compare[1] = "r".getBytes(); 
-    compare[2] = "u".getBytes();
-
-    assertEquals(keysArray.length, 3);
-
-    for (int row = 0; row<keysArray.length; row++){
-      assertArrayEquals(keysArray[row], compare[row]);
-    }
-  }
-
 }



Mime
View raw message