hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-12223 MultiTableInputFormatBase.getSplits is too slow (Yuanbo Peng)
Date Thu, 18 Dec 2014 13:15:32 GMT
Repository: hbase
Updated Branches:
  refs/heads/0.94 a9114543c -> 20c83b53c


HBASE-12223 MultiTableInputFormatBase.getSplits is too slow (Yuanbo Peng)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/20c83b53
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/20c83b53
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/20c83b53

Branch: refs/heads/0.94
Commit: 20c83b53cac6d45966f8a2e8b7f596a5b4639adb
Parents: a911454
Author: tedyu <yuzhihong@gmail.com>
Authored: Thu Dec 18 05:15:24 2014 -0800
Committer: tedyu <yuzhihong@gmail.com>
Committed: Thu Dec 18 05:15:24 2014 -0800

----------------------------------------------------------------------
 .../mapreduce/MultiTableInputFormatBase.java    | 97 ++++++++++++--------
 1 file changed, 59 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/20c83b53/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
b/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
index 76a1632..7160eac 100644
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
@@ -37,6 +37,9 @@ import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
 /**
  * A base for {@link MultiTableInputFormat}s. Receives a list of
  * {@link Scan} instances that define the input tables and
@@ -105,59 +108,77 @@ public abstract class MultiTableInputFormatBase extends
    * @throws IOException When creating the list of splits fails.
    * @see org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)
    */
+
+
   @Override
   public List<InputSplit> getSplits(JobContext context) throws IOException {
     if (scans.isEmpty()) {
       throw new IOException("No scans were provided.");
     }
-    List<InputSplit> splits = new ArrayList<InputSplit>();
 
+    Map<String, List<Scan>> tableMaps = new HashMap<String, List<Scan>>();
     for (Scan scan : scans) {
       byte[] tableName = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
-      if (tableName == null) 
+      if (tableName == null)
         throw new IOException("A scan object did not have a table name");
-      HTable table = new HTable(context.getConfiguration(), tableName);
-      Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
-      if (keys == null || keys.getFirst() == null ||
-          keys.getFirst().length == 0) {
-        throw new IOException("Expecting at least one region for table : "
-            + Bytes.toString(tableName));
+      String tableNameStr = Bytes.toString(tableName);
+
+      List<Scan> scanList = tableMaps.get(tableNameStr);
+
+      if (scanList == null) {
+        scanList = new ArrayList<Scan>();
+        tableMaps.put(tableNameStr, scanList);
       }
-      int count = 0;
-      
-      byte[] startRow = scan.getStartRow();
-      byte[] stopRow = scan.getStopRow();
-
-      for (int i = 0; i < keys.getFirst().length; i++) {
-        if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
-          continue;
+      scanList.add(scan);
+    }
+
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+    Iterator iter = tableMaps.entrySet().iterator();
+    while (iter.hasNext()) {
+      Map.Entry<String, List<Scan>> entry = (Map.Entry<String, List<Scan>>)
iter.next();
+      String tableNameStr = entry.getKey();
+      List<Scan> scanList = entry.getValue();
+      HTable table = new HTable(context.getConfiguration(), tableNameStr);
+      Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
+      for (Scan scan : scanList) {
+        if (keys == null || keys.getFirst() == null ||
+                keys.getFirst().length == 0) {
+          throw new IOException("Expecting at least one region for table : "
+                  + tableNameStr);
         }
-        String regionLocation =
-            table.getRegionLocation(keys.getFirst()[i], false).getHostname();
-        
-        // determine if the given start and stop keys fall into the range
-        if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
-            Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
-            (stopRow.length == 0 ||
-                Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
-          byte[] splitStart =
-              startRow.length == 0 ||
-                  Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
-                  .getFirst()[i] : startRow;
-          byte[] splitStop =
-              (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
-                  stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
-                  .getSecond()[i] : stopRow;
-          InputSplit split =
-              new TableSplit(tableName, scan, splitStart,
-                  splitStop, regionLocation);
-          splits.add(split);
-          if (LOG.isDebugEnabled())
-            LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+        int count = 0;
+        byte[] startRow = scan.getStartRow();
+        byte[] stopRow = scan.getStopRow();
+        for (int i = 0; i < keys.getFirst().length; i++) {
+          if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+            continue;
+          }
+
+          // determine if the given start and stop keys fall into the range
+          if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+                  Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+                  (stopRow.length == 0 || Bytes.compareTo(stopRow,
+                  keys.getFirst()[i]) > 0)) {
+            byte[] splitStart = startRow.length == 0 ||
+                    Bytes.compareTo(keys.getFirst()[i],
+                    startRow) >= 0 ? keys.getFirst()[i] : startRow;
+            byte[] splitStop = (stopRow.length == 0 ||
+                    Bytes.compareTo(keys.getSecond()[i],
+                    stopRow) <= 0) && keys.getSecond()[i].length > 0 ?
+                    keys.getSecond()[i] : stopRow;
+            String regionLocation = table.getRegionLocation(
+                    keys.getFirst()[i], false).getHostname();
+            InputSplit split = new TableSplit(Bytes.toBytes(tableNameStr), scan,
+                    splitStart, splitStop, regionLocation);
+            splits.add(split);
+            if (LOG.isDebugEnabled())
+              LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+          }
         }
       }
       table.close();
     }
+
     return splits;
   }
 


Mime
View raw message