carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kumarvishal09 <...@git.apache.org>
Subject [GitHub] carbondata pull request #952: [CARBONDATA-1094] Wrong results returned by th...
Date Fri, 26 May 2017 09:42:58 GMT
Github user kumarvishal09 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/952#discussion_r118667394
  
    --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
---
    @@ -474,80 +495,142 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnDataChunk
dimensionColumnD
           int numerOfRows) {
         BitSet bitSet = new BitSet(numerOfRows);
         // if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) {
    -    int start = 0;
    -    int startMin = 0;
    -    int endMax = 0;
    -    int startIndex = 0;
         byte[][] filterValues = this.filterRangesValues;
    -    // For Range expression we expect two values. The First is the Min Value and Second
is the
    -    // Max value.
    -    if (startBlockMinIsDefaultStart == false) {
    -
    -      start = CarbonUtil
    -          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk,
    -              startIndex, numerOfRows - 1, filterValues[0], greaterThanExp);
    +    if (dimensionColumnDataChunk.isExplicitSorted()) {
    +      int start = 0;
    +      int startMin = 0;
    +      int endMax = 0;
    +      int startIndex = 0;
    +      // For Range expression we expect two values. The First is the Min Value and Second
is the
    +      // Max value.
    +      if (startBlockMinIsDefaultStart == false) {
     
    -      if (greaterThanExp == true && start >= 0) {
             start = CarbonUtil
    -            .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0],
    -                numerOfRows);
    -      }
    +            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows
- 1,
    +                filterValues[0], greaterThanExp);
     
    -      if (start < 0) {
    -        start = -(start + 1);
    -        if (start == numerOfRows) {
    -          start = start - 1;
    +        if (greaterThanExp == true && start >= 0) {
    +          start = CarbonUtil
    +              .nextGreaterValueToTarget(start, dimensionColumnDataChunk, filterValues[0],
    +                  numerOfRows);
             }
    -        // Method will compare the tentative index value after binary search, this tentative
    -        // index needs to be compared by the filter member if its >= filter then from
that
    -        // index the bitset will be considered for filtering process.
    -        if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start)))
    -            > 0) {
    -          start = start + 1;
    +
    +        if (start < 0) {
    +          start = -(start + 1);
    +          if (start == numerOfRows) {
    +            start = start - 1;
    +          }
    +          // Method will compare the tentative index value after binary search, this
tentative
    +          // index needs to be compared by the filter member if its >= filter then
from that
    +          // index the bitset will be considered for filtering process.
    +          if ((ByteUtil.compare(filterValues[0], dimensionColumnDataChunk.getChunkData(start)))
    +              > 0) {
    +            start = start + 1;
    +          }
             }
    +        startMin = start;
    +      } else {
    +        startMin = startIndex;
           }
    -      startMin = start;
    -    } else {
    -      startMin = startIndex;
    -    }
    -
    -    if (endBlockMaxisDefaultEnd == false) {
    -      start = CarbonUtil
    -          .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows
- 1,
    -              filterValues[1], lessThanEqualExp);
     
    -      if (lessThanExp == true && start >= 0) {
    -        start =
    -            CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]);
    -      }
    +      if (endBlockMaxisDefaultEnd == false) {
    +        start = CarbonUtil
    +            .getFirstIndexUsingBinarySearch(dimensionColumnDataChunk, startIndex, numerOfRows
- 1,
    +                filterValues[1], lessThanEqualExp);
     
    -      if (start < 0) {
    -        start = -(start + 1);
    -        if (start == numerOfRows) {
    -          start = start - 1;
    +        if (lessThanExp == true && start >= 0) {
    +          start =
    +              CarbonUtil.nextLesserValueToTarget(start, dimensionColumnDataChunk, filterValues[1]);
             }
    -        // In case the start is less than 0, then positive value of start is pointing
to the next
    -        // value of the searched key. So move to the previous one.
    -        if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start))
    -            < 0)) {
    -          start = start - 1;
    +
    +        if (start < 0) {
    +          start = -(start + 1);
    +          if (start == numerOfRows) {
    +            start = start - 1;
    +          }
    +          // In case the start is less than 0, then positive value of start is pointing
to the next
    +          // value of the searched key. So move to the previous one.
    +          if ((ByteUtil.compare(filterValues[1], dimensionColumnDataChunk.getChunkData(start))
    +              < 0)) {
    +            start = start - 1;
    +          }
             }
    +        endMax = start;
    +      } else {
    +        endMax = numerOfRows - 1;
    +      }
    +
    +      for (int j = startMin; j <= endMax; j++) {
    +        bitSet.set(j);
    +      }
    +
    +      // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for
null on
    +      // matching row.
    +      if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
    +        updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet);
           }
    -      endMax = start;
         } else {
    -      endMax = numerOfRows - 1;
    +      // evaluate result for lower range value first and then perform and operation in
the
    +      // upper range value in order to compute the final result
    +      bitSet = evaluateGreaterThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[0],
    +          numerOfRows);
    +      bitSet.and(evaluateLessThanFilterForUnsortedColumn(dimensionColumnDataChunk, filterValues[1],
    +          numerOfRows));
         }
    +    return bitSet;
    +  }
     
    -    for (int j = startMin; j <= endMax; j++) {
    -      bitSet.set(j);
    +  /**
    +   * This method will evaluate the result for filter column based on the lower range
value
    +   *
    +   * @param dimensionColumnDataChunk
    +   * @param filterValue
    +   * @param numberOfRows
    +   * @return
    +   */
    +  private BitSet evaluateGreaterThanFilterForUnsortedColumn(
    +      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows)
{
    +    BitSet bitSet = new BitSet(numberOfRows);
    +    if (greaterThanExp) {
    +      for (int i = 0; i < numberOfRows; i++) {
    +        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue)
> 0)) {
    +          bitSet.set(i);
    +        }
    +      }
    +    } else if (greaterThanEqualExp) {
    +      for (int i = 0; i < numberOfRows; i++) {
    +        if ((ByteUtil.compare(dimensionColumnDataChunk.getChunkData(i), filterValue)
>= 0)) {
    +          bitSet.set(i);
    +        }
    +      }
         }
    +    return bitSet;
    +  }
     
    -    // Binary Search cannot be done on '@NU#LL$!", so need to check and compare for null
on
    -    // matching row.
    -    if (dimensionColumnDataChunk.isNoDicitionaryColumn()) {
    -      updateForNoDictionaryColumn(startMin, endMax, dimensionColumnDataChunk, bitSet);
    +  /**
    +   * This method will evaluate the result for filter column based on the upper range
value
    +   *
    +   * @param dimensionColumnDataChunk
    +   * @param filterValue
    +   * @param numberOfRows
    +   * @return
    +   */
    +  private BitSet evaluateLessThanFilterForUnsortedColumn(
    +      DimensionColumnDataChunk dimensionColumnDataChunk, byte[] filterValue, int numberOfRows)
{
    --- End diff --
    
    As we are adding @nu#LL$! for No dictionary null values in this method after getting less
values or less than equal to values we need to check whether null values are present or not
if it is present then we need to remove it from bitset


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message