drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject drill git commit: DRILL-1900: Fix numeric overflow problem in hbase stat calculation.
Date Mon, 22 Dec 2014 23:15:37 GMT
Repository: drill
Updated Branches:
  refs/heads/master df5695477 -> 5f70ba1cd


DRILL-1900: Fix numeric overflow problem in hbase stat calculation.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/5f70ba1c
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/5f70ba1c
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/5f70ba1c

Branch: refs/heads/master
Commit: 5f70ba1cd17604d2ccb232ae9715629197389c41
Parents: df56954
Author: Jinfeng Ni <jni@maprtech.com>
Authored: Thu Dec 18 18:26:23 2014 -0800
Committer: Jinfeng Ni <jni@maprtech.com>
Committed: Mon Dec 22 11:01:12 2014 -0800

----------------------------------------------------------------------
 .../org/apache/drill/exec/store/hbase/HBaseGroupScan.java   | 2 +-
 .../apache/drill/exec/store/hbase/TableStatsCalculator.java | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/5f70ba1c/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
index 420fe77..6d18d12 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseGroupScan.java
@@ -352,7 +352,7 @@ public class HBaseGroupScan extends AbstractGroupScan implements DrillHBaseConst
 
   @Override
   public ScanStats getScanStats() {
-    int rowCount =  (int) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) * (hbaseScanSpec.getFilter()
!= null ? 0.5 : 1));
+    long rowCount = (long) ((scanSizeInBytes / statsCalculator.getAvgRowSizeInBytes()) *
(hbaseScanSpec.getFilter() != null ? 0.5 : 1));
     // the following calculation is not precise since 'columns' could specify CFs while getColsPerRow()
returns the number of qualifier.
     float diskCost = scanSizeInBytes * ((columns == null || columns.isEmpty()) ? 1 : columns.size()/statsCalculator.getColsPerRow());
     return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, diskCost);

http://git-wip-us.apache.org/repos/asf/drill/blob/5f70ba1c/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
index 473deeb..9c8fbad 100644
--- a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
+++ b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/TableStatsCalculator.java
@@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Bytes;
 public class TableStatsCalculator {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TableStatsCalculator.class);
 
+  public static final long DEFAULT_ROW_COUNT = 1024L * 1024L;
+
   private static final String DRILL_EXEC_HBASE_SCAN_SAMPLE_ROWS_COUNT = "drill.exec.hbase.scan.samplerows.count";
 
   private static final int DEFAULT_SAMPLE_SIZE = 100;
@@ -74,7 +76,8 @@ public class TableStatsCalculator {
         scan.setCaching(rowsToSample < DEFAULT_SAMPLE_SIZE ? rowsToSample : DEFAULT_SAMPLE_SIZE);
         scan.setMaxVersions(1);
         ResultScanner scanner = table.getScanner(scan);
-        int rowSizeSum = 0, numColumnsSum = 0, rowCount = 0;
+        long rowSizeSum = 0;
+        int numColumnsSum = 0, rowCount = 0;
         for (; rowCount < rowsToSample; ++rowCount) {
           Result row = scanner.next();
           if (row == null) {
@@ -84,7 +87,7 @@ public class TableStatsCalculator {
           rowSizeSum += row.getBytes().getLength();
         }
         if (rowCount > 0) {
-          avgRowSizeInBytes = rowSizeSum/rowCount;
+          avgRowSizeInBytes = (int) (rowSizeSum/rowCount);
           colsPerRow = numColumnsSum/rowCount;
         }
         scanner.close();
@@ -155,7 +158,7 @@ public class TableStatsCalculator {
    */
   public long getRegionSizeInBytes(byte[] regionId) {
     if (sizeMap == null) {
-      return avgRowSizeInBytes*1024*1024; // 1 million rows
+      return (long) avgRowSizeInBytes * DEFAULT_ROW_COUNT; // 1 million rows
     } else {
       Long size = sizeMap.get(regionId);
       if (size == null) {


Mime
View raw message