hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r834188 - in /hadoop/hbase/branches/0.20: CHANGES.txt src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
Date Mon, 09 Nov 2009 19:41:52 GMT
Author: stack
Date: Mon Nov  9 19:41:51 2009
New Revision: 834188

URL: http://svn.apache.org/viewvc?rev=834188&view=rev
Log:
HBASE-1829 Make use of start/stop row in TableInputFormat

Modified:
    hadoop/hbase/branches/0.20/CHANGES.txt
    hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java

Modified: hadoop/hbase/branches/0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=834188&r1=834187&r2=834188&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.20/CHANGES.txt Mon Nov  9 19:41:51 2009
@@ -50,6 +50,7 @@
    HBASE-1947  If HBase starts/stops often in less than 24 hours, 
                you end up with lots of store files
    HBASE-1867  Tool to regenerate an hbase table from the data files
+   HBASE-1829  Make use of start/stop row in TableInputFormat
 
 Release 0.20.1 - Released October 12th, 2009
   INCOMPATIBLE CHANGES

Modified: hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java?rev=834188&r1=834187&r2=834188&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
(original)
+++ hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
Mon Nov  9 19:41:51 2009
@@ -23,6 +23,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.ArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -33,11 +34,14 @@
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -271,29 +275,40 @@
    */
   @Override
   public List<InputSplit> getSplits(JobContext context) throws IOException {
+    Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
+    if (keys == null || keys.getFirst() == null || 
+        keys.getFirst().length == 0) {
+      throw new IOException("Expecting at least one region.");
+    }
     if (table == null) {
       throw new IOException("No table was provided.");
     }
-    byte [][] startKeys = table.getStartKeys();
-    if (startKeys == null || startKeys.length == 0) {
-      throw new IOException("Expecting at least one region.");
-    }
-    int realNumSplits = startKeys.length;
-    InputSplit[] splits = new InputSplit[realNumSplits];
-    int middle = startKeys.length / realNumSplits;
-    int startPos = 0;
-    for (int i = 0; i < realNumSplits; i++) {
-      int lastPos = startPos + middle;
-      lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
-      String regionLocation = table.getRegionLocation(startKeys[startPos]).
-        getServerAddress().getHostname(); 
-      splits[i] = new TableSplit(this.table.getTableName(),
-        startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
-          HConstants.EMPTY_START_ROW, regionLocation);
-      LOG.info("split: " + i + "->" + splits[i]);
-      startPos = lastPos;
+    int count = 0;
+    List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);

+    for (int i = 0; i < keys.getFirst().length; i++) {
+      String regionLocation = table.getRegionLocation(keys.getFirst()[i]).
+        getServerAddress().getHostname();
+      byte[] startRow = scan.getStartRow();
+      byte[] stopRow = scan.getStopRow();
+      // determine if the given start an stop key fall into the region
+      if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+           Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+          (stopRow.length == 0 || 
+           Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
+        byte[] splitStart = startRow.length == 0 || 
+          Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? 
+            keys.getFirst()[i] : startRow;
+        byte[] splitStop = stopRow.length == 0 || 
+          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0 ? 
+            keys.getSecond()[i] : stopRow;
+        InputSplit split = new TableSplit(table.getTableName(),
+          splitStart, splitStop, regionLocation);
+        splits.add(split);
+        if (LOG.isDebugEnabled()) 
+          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+      }
     }
-    return Arrays.asList(splits);
+    return splits;
   }
 
   /**



Mime
View raw message