carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [1/2] incubator-carbondata git commit: put if clause out of the for clause and add test case
Date Sun, 12 Mar 2017 12:46:24 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master cee2a0d00 -> cb2c2a3a2


put if clause out of the for clause and add test case

remove blank row

remove blank row

add test comment

add  test comment

add comments for test case make it is easy to read

add comments for baseFilteredValue


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/399e9687
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/399e9687
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/399e9687

Branch: refs/heads/master
Commit: 399e9687eee3544141a44d49374b2b2fb3219710
Parents: cee2a0d
Author: mayun <simafengyun1984@163.com>
Authored: Sun Mar 12 10:51:36 2017 +0800
Committer: chenliang613 <chenliang613@huawei.com>
Committed: Sun Mar 12 20:45:12 2017 +0800

----------------------------------------------------------------------
 .../executer/IncludeFilterExecuterImpl.java     |  13 +-
 .../executer/IncludeFilterExecuterImplTest.java | 219 +++++++++++--------
 2 files changed, 131 insertions(+), 101 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/399e9687/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index 732e87c..7b8f084 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -138,23 +138,20 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
       byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-      for (int i = 0; i < numerOfRows; i++) {
-
-        if (filterValues.length > 1) {
+      if (filterValues.length > 1) {
+        for (int i = 0; i < numerOfRows; i++) {
           int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
               dimensionColumnDataChunk.getChunkData(i));
-
           if (index >= 0) {
             bitSet.set(i);
           }
-        } else if (filterValues.length == 1) {
+        }
+      } else if (filterValues.length == 1) {
+        for (int i = 0; i < numerOfRows; i++) {
           if (dimensionColumnDataChunk.compareTo(i, filterValues[0]) == 0) {
             bitSet.set(i);
           }
-        } else {
-          break;
         }
-
       }
     }
     return bitSet;

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/399e9687/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
index b2aa855..87b9c2d 100644
--- a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java
@@ -41,23 +41,20 @@ public class IncludeFilterExecuterImplTest extends TestCase {
     BitSet bitSet = new BitSet(numerOfRows);
     if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
       // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-      for (int i = 0; i < numerOfRows; i++) {
-
-        if (filterValues.length > 1) {
+      if (filterValues.length > 1) {
+        for (int i = 0; i < numerOfRows; i++) {
           int index = CarbonUtil.binarySearch(filterValues, 0, filterValues.length - 1,
               dimensionColumnDataChunk.getChunkData(i));
-
           if (index >= 0) {
             bitSet.set(i);
           }
-        } else if (filterValues.length == 1) {
+        }
+      } else if (filterValues.length == 1) {
+        for (int i = 0; i < numerOfRows; i++) {
           if (dimensionColumnDataChunk.compareTo(i, filterValues[0]) == 0) {
             bitSet.set(i);
           }
-        } else {
-          break;
         }
-
       }
     }
     return bitSet;
@@ -80,107 +77,137 @@ public class IncludeFilterExecuterImplTest extends TestCase {
   }
 
   /**
-   * short int to byte
+   * change int to byte[]
    * 
-   * @param s
-   *          short int
+   * @param value
+   * @param size
    * @return byte[]
    */
-  private byte[] unsignedShortToByte2(int s) {
-    byte[] targets = new byte[2];
-    targets[0] = (byte) (s >> 8 & 0xFF);
-    targets[1] = (byte) (s & 0xFF);
+  private byte[] transferIntToByteArr(int value, int size) {
+    byte[] targets = new byte[size];
+    for (int i = 0; i < size; i++) {
+      int data = value;
+      for (int j = i; j < size - 1; j++) {
+        data = data >> 8;
+      }
+      data = data & 0xFF;
+      targets[i] = (byte) (data & 0xFF);
+    }
     return targets;
   }
 
   @Test
   public void testPerformance() {
 
+    // dimension's data number in a blocklet, usually default is 120000
+    int dataChunkSize = 120000; 
+    //  repeat query times in the test
+    int queryTimes = 5;    
+    // repeated times for a dictionary value
+    int repeatTimes = 200;
+    //filtered value count in a blocklet
+    int filteredValueCnt = 800;
+    comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
+    
+    filteredValueCnt = 100;
+    // set big repeat value for test dimension dictionary size is 1
+    repeatTimes = 2000;
+    comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
+
+  }
+  
+  /**
+   * Tests the filterKeys.length = 0  and filterKeys.length = 1
+   */
+  @Test
+  public void testBoundary() {
+
+	// dimension's data number in a blocklet, usually default is 120000
+    int dataChunkSize = 120000; 
+    //  repeat query times in the test
+    int queryTimes = 5;    
+    // repeated times for a dictionary value
+    int repeatTimes = 20;
+    //filtered value count in a blocklet
+    int filteredValueCnt = 1;
+    comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
+    
+    filteredValueCnt = 0;
+    comparePerformance(dataChunkSize, filteredValueCnt, queryTimes, repeatTimes);
+
+  }
+
+  /**
+   * comapre result and performance
+   * 
+   * @param dataChunkSize dataChunk's stored data size
+   * @param filteredValueCnt filtered dictionary value count
+   * @param queryTimes repeat query times in the test
+   * @param repeatTimes repeated times for a dictionary value
+   * @return 
+   */
+  private void comparePerformance(int dataChunkSize, int filteredValueCnt,
+      int queryTimes, int repeatTimes) {
+    long start;
     long oldTime = 0;
     long newTime = 0;
-    long start;
-    int dataCnt = 120000;
-    int filterCnt = 800;
-    int queryCnt = 5;
-    int repeatCnt = 20;
-    byte[] keyWord = new byte[2];
+    
+    // used to generate filter value
+    int baseFilteredValue = 100;
+    // column dictionary size
+    int dimColumnSize = 2;
+    if ((dataChunkSize / repeatTimes) <= 255 && (baseFilteredValue+filteredValueCnt)
<= 255) {
+      dimColumnSize = 1;
+    }
+    System.out.println("dimColumnSize: " + dimColumnSize);
+    
     FixedLengthDimensionDataChunk dimensionColumnDataChunk;
     DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
 
-    byte[] dataChunk = new byte[dataCnt * keyWord.length];
-    for (int i = 0; i < dataCnt; i++) {
-
-      if (i % repeatCnt == 0) {
-        repeatCnt++;
+    byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
+    for (int i = 0; i < dataChunkSize; i++) {
+      if (i % repeatTimes == 0) {
+        repeatTimes++;
+      }
+      byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);      
+      for(int j =0 ; j< dimColumnSize;j++){      
+        dataChunk[dimColumnSize * i + j] = data[j];
       }
-
-      byte[] data = unsignedShortToByte2(repeatCnt);
-      dataChunk[2 * i] = data[0];
-      dataChunk[2 * i + 1] = data[1];
-
     }
 
-    byte[][] filterKeys = new byte[filterCnt][2];
-    for (int ii = 0; ii < filterCnt; ii++) {
-      filterKeys[ii] = unsignedShortToByte2(100 + ii);
+    byte[][] filterKeys = new byte[filteredValueCnt][2];
+    for (int k = 0; k < filteredValueCnt; k++) {
+      filterKeys[k] = transferIntToByteArr(baseFilteredValue + k, dimColumnSize);
     }
     dim.setFilterKeys(filterKeys);
 
     dimensionColumnDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
-        dataChunk.length / keyWord.length, keyWord.length);
+        dataChunkSize, dimColumnSize);
 
-    for (int j = 0; j < queryCnt; j++) {
+    // repeat query and compare 2 result between old code and new optimized code
+    for (int j = 0; j < queryTimes; j++) {
 
       start = System.currentTimeMillis();
-      BitSet bitOld = this.setFilterdIndexToBitSet(dimensionColumnDataChunk, dataCnt, filterKeys);
+      BitSet bitOld = this.setFilterdIndexToBitSet(dimensionColumnDataChunk, dataChunkSize,
filterKeys);
       oldTime = oldTime + System.currentTimeMillis() - start;
 
       start = System.currentTimeMillis();
-      BitSet bitNew = this.setFilterdIndexToBitSetNew((FixedLengthDimensionDataChunk) dimensionColumnDataChunk,
dataCnt,
+      BitSet bitNew = this.setFilterdIndexToBitSetNew((FixedLengthDimensionDataChunk) dimensionColumnDataChunk,
dataChunkSize,
           filterKeys);
       newTime = newTime + System.currentTimeMillis() - start;
 
       assertTrue(bitOld.equals(bitNew));
-
     }
 
-    assertTrue(newTime < oldTime);
+    if (filteredValueCnt >= 100) {
+      assertTrue(newTime < oldTime);
+    }
 
-    System.out.println("old code performance time: " + oldTime);
-    System.out.println("new code performance time: " + newTime);
+    System.out.println("old code performance time: " + oldTime + " ms");
+    System.out.println("new code performance time: " + newTime + " ms");
 
   }
 
-  private BitSet setFilterdIndexToBitSetWithColumnIndex(FixedLengthDimensionDataChunk dimensionColumnDataChunk,
-      int numerOfRows, byte[][] filterValues) {
-    BitSet bitSet = new BitSet(numerOfRows);
-    int start = 0;
-    int last = 0;
-    int startIndex = 0;
-    // byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys();
-    for (int i = 0; i < filterValues.length; i++) {
-      start = CarbonUtil.getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex,
numerOfRows - 1,
-          filterValues[i], false);
-      if (start < 0) {
-        continue;
-      }
-      bitSet.set(start);
-      last = start;
-      for (int j = start + 1; j < numerOfRows; j++) {
-        if (dimensionColumnDataChunk.compareTo(j, filterValues[i]) == 0) {
-          bitSet.set(j);
-          last++;
-        } else {
-          break;
-        }
-      }
-      startIndex = last;
-      if (startIndex >= numerOfRows) {
-        break;
-      }
-    }
-    return bitSet;
-  }
 
   private BitSet setFilterdIndexToBitSetWithColumnIndexOld(FixedLengthDimensionDataChunk
dimensionColumnDataChunk,
       int numerOfRows, byte[][] filterValues) {
@@ -240,50 +267,56 @@ public class IncludeFilterExecuterImplTest extends TestCase {
     long newTime = 0;
     long start;
     long end;
-    int dataCnt = 120000;
-    int filterCnt = 800;
-    int queryCnt = 10000;
-    int repeatCnt = 200;
-    byte[] keyWord = new byte[2];
+    
+    // dimension's data number in a blocklet, usually default is 120000
+    int dataChunkSize = 120000; 
+    //  repeat query times in the test
+    int queryTimes = 10000;    
+    // repeated times for a dictionary value
+    int repeatTimes = 200;
+    //filtered value count in a blocklet
+    int filteredValueCnt = 800;
+    // column dictionary size
+    int dimColumnSize = 2;
     FixedLengthDimensionDataChunk dimensionColumnDataChunk;
     DimColumnExecuterFilterInfo dim = new DimColumnExecuterFilterInfo();
 
-    byte[] dataChunk = new byte[dataCnt * keyWord.length];
-    for (int i = 0; i < dataCnt; i++) {
+    byte[] dataChunk = new byte[dataChunkSize * dimColumnSize];
+    for (int i = 0; i < dataChunkSize; i++) {
 
-      if (i % repeatCnt == 0) {
-        repeatCnt++;
+      if (i % repeatTimes == 0) {
+        repeatTimes++;
       }
 
-      byte[] data = unsignedShortToByte2(repeatCnt);
+      byte[] data = transferIntToByteArr(repeatTimes, dimColumnSize);
       dataChunk[2 * i] = data[0];
       dataChunk[2 * i + 1] = data[1];
 
     }
 
-    byte[][] filterKeys = new byte[filterCnt][2];
-    for (int ii = 0; ii < filterCnt; ii++) {
-      filterKeys[ii] = unsignedShortToByte2(100 + ii);
+    byte[][] filterKeys = new byte[filteredValueCnt][2];
+    for (int k = 0; k < filteredValueCnt; k++) {
+      filterKeys[k] = transferIntToByteArr(100 + k, dimColumnSize);
     }
     dim.setFilterKeys(filterKeys);
 
     dimensionColumnDataChunk = new FixedLengthDimensionDataChunk(dataChunk, null, null,
-        dataChunk.length / keyWord.length, keyWord.length);
+        dataChunk.length / dimColumnSize, dimColumnSize);
 
     // initial to run
-    BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk,
dataCnt, filterKeys);
-    BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk,
dataCnt, filterKeys);
+    BitSet bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk,
dataChunkSize, filterKeys);
+    BitSet bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk,
dataChunkSize, filterKeys);
 
     // performance run
-    for (int j = 0; j < queryCnt; j++) {
+    for (int j = 0; j < queryTimes; j++) {
 
       start = System.currentTimeMillis();
-      bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataCnt,
filterKeys);
+      bitOld = this.setFilterdIndexToBitSetWithColumnIndexOld(dimensionColumnDataChunk, dataChunkSize,
filterKeys);
       end = System.currentTimeMillis();
       oldTime = oldTime + end - start;
 
       start = System.currentTimeMillis();
-      bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataCnt,
filterKeys);
+      bitNew = this.setFilterdIndexToBitSetWithColumnIndexNew(dimensionColumnDataChunk, dataChunkSize,
filterKeys);
       end = System.currentTimeMillis();
       newTime = newTime + end - start;
 
@@ -291,8 +324,8 @@ public class IncludeFilterExecuterImplTest extends TestCase {
 
     }
 
-    System.out.println("old code performance time: " + oldTime);
-    System.out.println("new code performance time: " + newTime);
+    System.out.println("old code performance time: " + oldTime + " ms");
+    System.out.println("new code performance time: " + newTime + " ms");
 
   }
 


Mime
View raw message