carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sounakr <...@git.apache.org>
Subject [GitHub] carbondata pull request #1079: [WIP]Measure Filter implementation
Date Mon, 03 Jul 2017 01:44:33 GMT
Github user sounakr commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1079#discussion_r125199254
  
    --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
---
    @@ -18,56 +18,152 @@
     
     import java.io.IOException;
     import java.util.BitSet;
    +import java.util.Comparator;
     
     import org.apache.carbondata.core.datastore.block.SegmentProperties;
     import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
    +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk;
     import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
    +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
    +import org.apache.carbondata.core.metadata.datatype.DataType;
     import org.apache.carbondata.core.scan.filter.FilterUtil;
    +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil;
     import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
    +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
     import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
     import org.apache.carbondata.core.util.BitSetGroup;
     import org.apache.carbondata.core.util.CarbonUtil;
    +import org.apache.carbondata.core.util.DataTypeUtil;
     
     public class ExcludeFilterExecuterImpl implements FilterExecuter {
     
       protected DimColumnResolvedFilterInfo dimColEvaluatorInfo;
       protected DimColumnExecuterFilterInfo dimColumnExecuterInfo;
    +  protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo;
    +  protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo;
       protected SegmentProperties segmentProperties;
    +  protected boolean isDimensionPresentInCurrentBlock = false;
    +  protected boolean isMeasurePresentInCurrentBlock = false;
       /**
        * is dimension column data is natural sorted
        */
    -  private boolean isNaturalSorted;
    +  private boolean isNaturalSorted = false;
    +
       public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo,
    -      SegmentProperties segmentProperties) {
    -    this.dimColEvaluatorInfo = dimColEvaluatorInfo;
    -    dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
    +      MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties,
    +      boolean isMeasure) {
         this.segmentProperties = segmentProperties;
    -    FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties,
    -        dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo);
    -    isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() &&
dimColEvaluatorInfo
    -        .getDimension().isSortColumn();
    +    if (isMeasure == false) {
    +      this.dimColEvaluatorInfo = dimColEvaluatorInfo;
    +      dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
    +
    +      FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties,
    +          dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null);
    +      isDimensionPresentInCurrentBlock = true;
    +      isNaturalSorted =
    +          dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo
    +              .getDimension().isSortColumn();
    +    } else {
    +      this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo;
    +      msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo();
    +      FilterUtil
    +          .prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties,
    +              null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo);
    +      isMeasurePresentInCurrentBlock = true;
    +    }
    +
       }
     
       @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws
IOException {
    -    int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping()
    -        .get(dimColEvaluatorInfo.getColumnIndex());
    -    if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
    -      blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
    -          .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
    +    if (isDimensionPresentInCurrentBlock == true) {
    +      int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping()
    +          .get(dimColEvaluatorInfo.getColumnIndex());
    +      if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
    +        blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
    +            .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
    +      }
    +      DimensionRawColumnChunk dimensionRawColumnChunk =
    +          blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
    +      DimensionColumnDataChunk[] dimensionColumnDataChunks =
    +          dimensionRawColumnChunk.convertToDimColDataChunks();
    +      BitSetGroup bitSetGroup = new BitSetGroup(dimensionRawColumnChunk.getPagesCount());
    +      for (int i = 0; i < dimensionColumnDataChunks.length; i++) {
    +        BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i],
    +            dimensionRawColumnChunk.getRowCount()[i]);
    +        bitSetGroup.setBitSet(bitSet, i);
    +      }
    +
    +      return bitSetGroup;
    +    } else if (isMeasurePresentInCurrentBlock == true) {
    +      int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping()
    +          .get(msrColumnEvaluatorInfo.getColumnIndex());
    +      if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
    +        blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock()
    +            .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
    +      }
    +      MeasureRawColumnChunk measureRawColumnChunk =
    +          blockChunkHolder.getMeasureRawDataChunk()[blockIndex];
    +      MeasureColumnDataChunk[] measureColumnDataChunks =
    +          measureRawColumnChunk.convertToMeasureColDataChunks();
    +      BitSetGroup bitSetGroup = new BitSetGroup(measureRawColumnChunk.getPagesCount());
    +      DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo);
    +      for (int i = 0; i < measureColumnDataChunks.length; i++) {
    +        BitSet bitSet =
    +            getFilteredIndexes(measureColumnDataChunks[i], measureRawColumnChunk.getRowCount()[i],
    +                msrType);
    +        bitSetGroup.setBitSet(bitSet, i);
    +      }
    +      return bitSetGroup;
         }
    -    DimensionRawColumnChunk dimensionRawColumnChunk =
    -        blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
    -    DimensionColumnDataChunk[] dimensionColumnDataChunks =
    -        dimensionRawColumnChunk.convertToDimColDataChunks();
    -    BitSetGroup bitSetGroup =
    -        new BitSetGroup(dimensionRawColumnChunk.getPagesCount());
    -    for (int i = 0; i < dimensionColumnDataChunks.length; i++) {
    -      BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i],
    -          dimensionRawColumnChunk.getRowCount()[i]);
    -      bitSetGroup.setBitSet(bitSet, i);
    +    return null;
    +  }
    +
    +  private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo)
{
    +    switch (msrColumnEvaluatorInfo.getType()) {
    +      case SHORT:
    +        return DataType.SHORT;
    +      case INT:
    +        return DataType.INT;
    +      case LONG:
    +        return DataType.LONG;
    +      case DECIMAL:
    +        return DataType.DECIMAL;
    +      default:
    +        return DataType.DOUBLE;
         }
    +  }
     
    -    return bitSetGroup;
    +  protected BitSet getFilteredIndexes(MeasureColumnDataChunk measureColumnDataChunk,
    +      int numerOfRows, DataType msrType) {
    +    // Here the algorithm is
    +    // Get the measure values from the chunk. compare sequentially with the
    +    // the filter values. The one that matches sets it Bitset.
    +    BitSet bitSet = new BitSet(numerOfRows);
    +    bitSet.flip(0, numerOfRows);
    +    byte[][] filterValues = msrColumnExecutorInfo.getFilterKeys();
    --- End diff --
    
    As of now using ByteArray in filterKeys, in later optimization will convert will store
objects.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message