hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1525329 - /hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
Date Sun, 22 Sep 2013 08:15:08 GMT
Author: hashutosh
Date: Sun Sep 22 08:15:08 2013
New Revision: 1525329

URL: http://svn.apache.org/r1525329
Log:
HIVE-3420 : Inefficiency in hbase handler when process query including rowkey range scan (Navis
via Ashutosh Chauhan)

Modified:
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java?rev=1525329&r1=1525328&r2=1525329&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
(original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java
Sun Sep 22 08:15:08 2013
@@ -170,19 +170,6 @@ public class HiveHBaseTableInputFormat e
       scan.setBatch(Integer.valueOf(scanBatch));
     }
 
-    // If Hive's optimizer gave us a filter to process, convert it to the
-    // HBase scan form now.
-    int iKey = -1;
-
-    try {
-      iKey = HBaseSerDe.getRowKeyColumnOffset(columnsMapping);
-    } catch (SerDeException e) {
-      throw new IOException(e);
-    }
-
-    tableSplit = convertFilter(jobConf, scan, tableSplit, iKey,
-      getStorageFormatOfKey(columnsMapping.get(iKey).mappingSpec,
-      jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));
     setScan(scan);
     Job job = new Job(jobConf);
     TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(
@@ -254,26 +241,17 @@ public class HiveHBaseTableInputFormat e
    *
    * @param jobConf configuration for the scan
    *
-   * @param scan the HBase scan object to restrict
-   *
-   * @param tableSplit the HBase table split to restrict, or null
-   * if calculating splits
-   *
    * @param iKey 0-based offset of key column within Hive table
    *
    * @return converted table split if any
    */
-  private TableSplit convertFilter(
-    JobConf jobConf,
-    Scan scan,
-    TableSplit tableSplit,
-    int iKey, boolean isKeyBinary)
-    throws IOException {
+  private Scan createFilterScan(JobConf jobConf, int iKey, boolean isKeyBinary)
+      throws IOException {
 
-    String filterExprSerialized =
-      jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+    Scan scan = new Scan();
+    String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
     if (filterExprSerialized == null) {
-      return tableSplit;
+      return scan;
     }
     ExprNodeDesc filterExpr =
       Utilities.deserializeExpression(filterExprSerialized, jobConf);
@@ -340,16 +318,9 @@ public class HiveHBaseTableInputFormat e
         throw new IOException(comparisonOp + " is not a supported comparison operator");
       }
     }
-    if (tableSplit != null) {
-      tableSplit = new TableSplit(
-        tableSplit.getTableName(),
-        startRow,
-        stopRow,
-        tableSplit.getRegionLocation());
-    }
     scan.setStartRow(startRow);
     scan.setStopRow(stopRow);
-    return tableSplit;
+    return scan;
   }
 
     private byte[] getConstantVal(Object writable, PrimitiveObjectInspector poi,
@@ -393,7 +364,7 @@ public class HiveHBaseTableInputFormat e
 
   private byte[] getNextBA(byte[] current){
     // startRow is inclusive while stopRow is exclusive,
-    //this util method returns very next bytearray which will occur after the current one
+    // this util method returns very next bytearray which will occur after the current one
     // by padding current one with a trailing 0 byte.
     byte[] next = new byte[current.length + 1];
     System.arraycopy(current, 0, next, 0, current.length);
@@ -463,7 +434,16 @@ public class HiveHBaseTableInputFormat e
       throw new IOException(e);
     }
 
-    Scan scan = new Scan();
+    // Take filter pushdown into account while calculating splits; this
+    // allows us to prune off regions immediately.  Note that although
+    // the Javadoc for the superclass getSplits says that it returns one
+    // split per region, the implementation actually takes the scan
+    // definition into account and excludes regions which don't satisfy
+    // the start/stop row conditions (HBASE-1829).
+    Scan scan = createFilterScan(jobConf, iKey,
+        getStorageFormatOfKey(columnsMapping.get(iKey).mappingSpec,
+            jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));
+
 
     // The list of families that have been added to the scan
     List<String> addedFamilies = new ArrayList<String>();
@@ -486,18 +466,8 @@ public class HiveHBaseTableInputFormat e
         }
       }
     }
-
-    // Take filter pushdown into account while calculating splits; this
-    // allows us to prune off regions immediately.  Note that although
-    // the Javadoc for the superclass getSplits says that it returns one
-    // split per region, the implementation actually takes the scan
-    // definition into account and excludes regions which don't satisfy
-    // the start/stop row conditions (HBASE-1829).
-    convertFilter(jobConf, scan, null, iKey,
-      getStorageFormatOfKey(columnsMapping.get(iKey).mappingSpec,
-      jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));
-
     setScan(scan);
+
     Job job = new Job(jobConf);
     JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
     Path [] tablePaths = FileInputFormat.getInputPaths(jobContext);



Mime
View raw message