carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [17/50] carbondata git commit: [CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store
Date Mon, 30 Jul 2018 18:42:43 GMT
[CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store

Problem:
Filter query is failing for V1/V2 carbondata store

Root Cause:
in V1 store measure min max was not added in blockminmaxindex in executor when filter is applied
min max pruning is failing with array index out of cound exception

Solution:
Need to add min max for measure column same as already handled in driver block pruning

This closes #2550


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0a6fe088
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0a6fe088
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0a6fe088

Branch: refs/heads/branch-1.4
Commit: 0a6fe088941e4281f979503347223da048a47828
Parents: 68e203a
Author: kumarvishal09 <kumarvishal1802@gmail.com>
Authored: Tue Jul 24 20:10:54 2018 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Tue Jul 31 00:11:26 2018 +0530

----------------------------------------------------------------------
 .../indexstore/blockletindex/IndexWrapper.java  |  8 +-
 .../executor/impl/AbstractQueryExecutor.java    | 95 ++++++++++----------
 2 files changed, 50 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/0a6fe088/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
index 1de3122..9588f57 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
@@ -16,7 +16,6 @@
  */
 package org.apache.carbondata.core.indexstore.blockletindex;
 
-import java.io.IOException;
 import java.util.List;
 
 import org.apache.carbondata.core.datastore.block.AbstractIndex;
@@ -34,12 +33,11 @@ public class IndexWrapper extends AbstractIndex {
 
   private List<TableBlockInfo> blockInfos;
 
-  public IndexWrapper(List<TableBlockInfo> blockInfos) throws IOException {
+  public IndexWrapper(List<TableBlockInfo> blockInfos, SegmentProperties segmentProperties)
{
     this.blockInfos = blockInfos;
-    segmentProperties = new SegmentProperties(blockInfos.get(0).getDetailInfo().getColumnSchemas(),
-        blockInfos.get(0).getDetailInfo().getDimLens());
+    this.segmentProperties = segmentProperties;
     dataRefNode = new BlockletDataRefNode(blockInfos, 0,
-        segmentProperties.getDimensionColumnsValueSize());
+        this.segmentProperties.getDimensionColumnsValueSize());
   }
 
   @Override public void buildIndex(List<DataFileFooter> footerList) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/0a6fe088/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index c8c8a0f..5b67921 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -40,13 +40,11 @@ import org.apache.carbondata.core.datastore.IndexKey;
 import org.apache.carbondata.core.datastore.block.AbstractIndex;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.block.TableBlockInfo;
-import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
 import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode;
 import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper;
 import org.apache.carbondata.core.keygenerator.KeyGenException;
 import org.apache.carbondata.core.memory.UnsafeMemoryManager;
-import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
 import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
 import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -65,6 +63,7 @@ import org.apache.carbondata.core.scan.model.ProjectionMeasure;
 import org.apache.carbondata.core.scan.model.QueryModel;
 import org.apache.carbondata.core.stats.QueryStatistic;
 import org.apache.carbondata.core.stats.QueryStatisticsConstants;
+import org.apache.carbondata.core.util.BlockletDataMapUtil;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -128,13 +127,7 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
     // so block will be loaded in sorted order this will be required for
     // query execution
     Collections.sort(queryModel.getTableBlockInfos());
-
-    List<AbstractIndex> indexList = new ArrayList<>();
-    Map<String, List<TableBlockInfo>> listMap = getFilePathToTableBlockInfoMapping(queryModel);
-    for (List<TableBlockInfo> tableBlockInfos : listMap.values()) {
-      indexList.add(new IndexWrapper(tableBlockInfos));
-    }
-    queryProperties.dataBlocks = indexList;
+    queryProperties.dataBlocks = getDataBlocks(queryModel);
     queryStatistic
         .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
     queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
@@ -180,25 +173,27 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
   }
 
   /**
-   * Method to prepare file path to table block Info mapping
+   * Method returns the block(s) on which query will get executed
    *
    * @param queryModel
    * @return
    * @throws IOException
    */
-  private Map<String, List<TableBlockInfo>> getFilePathToTableBlockInfoMapping(
-      QueryModel queryModel) throws IOException {
+  private List<AbstractIndex> getDataBlocks(QueryModel queryModel) throws IOException
{
     Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>();
-    // thsi is introduced to handle the case when CACHE_LEVEL=BLOCK and there are few other
dataMaps
+    // this is introduced to handle the case when CACHE_LEVEL=BLOCK and there are few other
dataMaps
     // like lucene, Bloom created on the table. In that case all the dataMaps will do blocklet
     // level pruning and blockInfo entries will be repeated with different blockletIds
     Map<String, DataFileFooter> filePathToFileFooterMapping = new HashMap<>();
+    Map<String, SegmentProperties> filePathToSegmentPropertiesMap = new HashMap<>();
     for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) {
       List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath());
       if (tableBlockInfos == null) {
         tableBlockInfos = new ArrayList<>();
         listMap.put(blockInfo.getFilePath(), tableBlockInfos);
       }
+      SegmentProperties segmentProperties =
+          filePathToSegmentPropertiesMap.get(blockInfo.getFilePath());
       BlockletDetailInfo blockletDetailInfo = blockInfo.getDetailInfo();
       // This case can come in 2 scenarios:
       // 1. old stores (1.1 or any prior version to 1.1) where blocklet information is not
@@ -206,30 +201,45 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
       // 2. CACHE_LEVEL is set to block
       // 3. CACHE_LEVEL is BLOCKLET but filter column min/max is not cached in driver
       if (blockletDetailInfo.getBlockletInfo() == null || blockletDetailInfo
-          .isUseMinMaxForPruning()) {
-        readAndFillBlockletInfo(filePathToFileFooterMapping, tableBlockInfos, blockInfo,
-            blockletDetailInfo);
+            .isUseMinMaxForPruning()) {
+        blockInfo.setBlockOffset(blockletDetailInfo.getBlockFooterOffset());
+        DataFileFooter fileFooter = filePathToFileFooterMapping.get(blockInfo.getFilePath());
+        if (null == fileFooter) {
+          blockInfo.setDetailInfo(null);
+          fileFooter = CarbonUtil.readMetadatFile(blockInfo);
+          filePathToFileFooterMapping.put(blockInfo.getFilePath(), fileFooter);
+          blockInfo.setDetailInfo(blockletDetailInfo);
+        }
+        if (null == segmentProperties) {
+          segmentProperties = new SegmentProperties(fileFooter.getColumnInTable(),
+              blockInfo.getDetailInfo().getDimLens());
+          filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties);
+        }
+        readAndFillBlockletInfo(tableBlockInfos, blockInfo,
+            blockletDetailInfo, fileFooter, segmentProperties);
       } else {
+        if (null == segmentProperties) {
+          segmentProperties = new SegmentProperties(blockInfo.getDetailInfo().getColumnSchemas(),
+              blockInfo.getDetailInfo().getDimLens());
+          filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties);
+        }
         tableBlockInfos.add(blockInfo);
       }
     }
-    return listMap;
+    List<AbstractIndex> indexList = new ArrayList<>();
+    for (List<TableBlockInfo> tableBlockInfos : listMap.values()) {
+      indexList.add(new IndexWrapper(tableBlockInfos,
+          filePathToSegmentPropertiesMap.get(tableBlockInfos.get(0).getFilePath())));
+    }
+    return indexList;
   }
 
   /**
    * Read the file footer of block file and get the blocklets to query
    */
-  private void readAndFillBlockletInfo(Map<String, DataFileFooter> filePathToFileFooterMapping,
-      List<TableBlockInfo> tableBlockInfos, TableBlockInfo blockInfo,
-      BlockletDetailInfo blockletDetailInfo) throws IOException {
-    blockInfo.setBlockOffset(blockletDetailInfo.getBlockFooterOffset());
-    DataFileFooter fileFooter = filePathToFileFooterMapping.get(blockInfo.getFilePath());
-    if (null == fileFooter) {
-      blockInfo.setDetailInfo(null);
-      fileFooter = CarbonUtil.readMetadatFile(blockInfo);
-      filePathToFileFooterMapping.put(blockInfo.getFilePath(), fileFooter);
-      blockInfo.setDetailInfo(blockletDetailInfo);
-    }
+  private void readAndFillBlockletInfo(List<TableBlockInfo> tableBlockInfos,
+      TableBlockInfo blockInfo, BlockletDetailInfo blockletDetailInfo, DataFileFooter fileFooter,
+      SegmentProperties segmentProperties) {
     List<BlockletInfo> blockletList = fileFooter.getBlockletList();
     // cases when blockletID will be -1
     // 1. In case of legacy store
@@ -241,12 +251,12 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
       // fill the info only for given blockletId in detailInfo
       BlockletInfo blockletInfo = blockletList.get(blockletDetailInfo.getBlockletId());
       fillBlockletInfoToTableBlock(tableBlockInfos, blockInfo, blockletDetailInfo, fileFooter,
-          blockletInfo, blockletDetailInfo.getBlockletId());
+          blockletInfo, blockletDetailInfo.getBlockletId(), segmentProperties);
     } else {
       short count = 0;
       for (BlockletInfo blockletInfo : blockletList) {
         fillBlockletInfoToTableBlock(tableBlockInfos, blockInfo, blockletDetailInfo, fileFooter,
-            blockletInfo, count);
+            blockletInfo, count, segmentProperties);
         count++;
       }
     }
@@ -254,7 +264,7 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
 
   private void fillBlockletInfoToTableBlock(List<TableBlockInfo> tableBlockInfos,
       TableBlockInfo blockInfo, BlockletDetailInfo blockletDetailInfo, DataFileFooter fileFooter,
-      BlockletInfo blockletInfo, short blockletId) {
+      BlockletInfo blockletInfo, short blockletId, SegmentProperties segmentProperties) {
     TableBlockInfo info = blockInfo.copy();
     BlockletDetailInfo detailInfo = info.getDetailInfo();
     // set column schema details
@@ -263,14 +273,14 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
     byte[][] maxValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues();
     byte[][] minValues = blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues();
     if (blockletDetailInfo.isLegacyStore()) {
+      minValues = BlockletDataMapUtil.updateMinValues(segmentProperties,
+          blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues());
+      maxValues = BlockletDataMapUtil.updateMaxValues(segmentProperties,
+          blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues());
       // update min and max values in case of old store for measures as min and max is written
       // opposite for measures in old store ( store <= 1.1 version)
-      maxValues = CarbonUtil.updateMinMaxValues(fileFooter,
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues(),
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues(), false);
-      minValues = CarbonUtil.updateMinMaxValues(fileFooter,
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMaxValues(),
-          blockletInfo.getBlockletIndex().getMinMaxIndex().getMinValues(), true);
+      maxValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
+      minValues = CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, true);
       info.setDataBlockFromOldStore(true);
     }
     blockletInfo.getBlockletIndex().getMinMaxIndex().setMaxValues(maxValues);
@@ -281,17 +291,6 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E>
{
     tableBlockInfos.add(info);
   }
 
-  private List<TableBlockUniqueIdentifier> prepareTableBlockUniqueIdentifier(
-      List<TableBlockInfo> tableBlockInfos, AbsoluteTableIdentifier absoluteTableIdentifier)
{
-    List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers =
-        new ArrayList<>(tableBlockInfos.size());
-    for (TableBlockInfo blockInfo : tableBlockInfos) {
-      tableBlockUniqueIdentifiers
-          .add(new TableBlockUniqueIdentifier(absoluteTableIdentifier, blockInfo));
-    }
-    return tableBlockUniqueIdentifiers;
-  }
-
   protected List<BlockExecutionInfo> getBlockExecutionInfos(QueryModel queryModel)
       throws IOException, QueryExecutionException {
     initQuery(queryModel);


Mime
View raw message