Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 14086200C39 for ; Thu, 16 Mar 2017 10:29:08 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 1262A160B78; Thu, 16 Mar 2017 09:29:08 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C4953160B7A for ; Thu, 16 Mar 2017 10:29:05 +0100 (CET) Received: (qmail 53252 invoked by uid 500); 16 Mar 2017 09:29:05 -0000 Mailing-List: contact commits-help@carbondata.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@carbondata.incubator.apache.org Delivered-To: mailing list commits@carbondata.incubator.apache.org Received: (qmail 53234 invoked by uid 99); 16 Mar 2017 09:29:05 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd4-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Mar 2017 09:29:05 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd4-us-west.apache.org (ASF Mail Server at spamd4-us-west.apache.org) with ESMTP id 71688C0B3C for ; Thu, 16 Mar 2017 09:29:04 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd4-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -3.569 X-Spam-Level: X-Spam-Status: No, score=-3.569 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001, SPF_NEUTRAL=0.652] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd4-us-west.apache.org [10.40.0.11]) (amavisd-new, port 10024) with ESMTP id A-9wZw9GLN47 for ; Thu, 16 Mar 2017 09:28:51 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id D6D7C5FE6B for ; Thu, 16 Mar 2017 09:28:45 +0000 (UTC) Received: (qmail 52088 invoked by uid 99); 16 Mar 2017 09:28:45 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Mar 2017 09:28:45 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id CC5D8F3202; Thu, 16 Mar 2017 09:28:44 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: gvramana@apache.org To: commits@carbondata.incubator.apache.org Date: Thu, 16 Mar 2017 09:28:52 -0000 Message-Id: <0155c1f4abd84dda843c26cedc17d72c@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [09/14] incubator-carbondata git commit: Handled review comments archived-at: Thu, 16 Mar 2017 09:29:08 -0000 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java index 3b2e2bd..dd0c549 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/DimensionInfo.java @@ -34,6 +34,28 @@ public class DimensionInfo { private Object[] defaultValues; /** + * flag to check whether there exist a dictionary column in the query which + * does not exist in the current block + */ + private boolean isDictionaryColumnAdded; + + /** + * flag to check whether there exist a no dictionary column in the query which + * does not exist in the current block + */ + private boolean isNoDictionaryColumnAdded; + + /** + * count of dictionary column not existing in the current block + */ + private int newDictionaryColumnCount; + + /** + * count of no dictionary columns not existing in the current block + */ + private int newNoDictionaryColumnCount; + + /** * @param dimensionExists * @param defaultValues */ @@ -55,4 +77,36 @@ public class DimensionInfo { public Object[] getDefaultValues() { return defaultValues; } + + public boolean isDictionaryColumnAdded() { + return isDictionaryColumnAdded; + } + + public void setDictionaryColumnAdded(boolean dictionaryColumnAdded) { + isDictionaryColumnAdded = dictionaryColumnAdded; + } + + public boolean isNoDictionaryColumnAdded() { + return isNoDictionaryColumnAdded; + } + + public void setNoDictionaryColumnAdded(boolean noDictionaryColumnAdded) { + isNoDictionaryColumnAdded = noDictionaryColumnAdded; + } + + public int getNewDictionaryColumnCount() { + return newDictionaryColumnCount; + } + + public void setNewDictionaryColumnCount(int newDictionaryColumnCount) { + this.newDictionaryColumnCount = newDictionaryColumnCount; + } + + public int getNewNoDictionaryColumnCount() { + return newNoDictionaryColumnCount; + } + + public void setNewNoDictionaryColumnCount(int newNoDictionaryColumnCount) { + this.newNoDictionaryColumnCount = newNoDictionaryColumnCount; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index afb6553..5a98e44 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -248,25 +248,6 @@ public class QueryUtil { } /** - * This method will return the key ordinal of the query dimension from the current block - * - * @param blockDimensions - * @param queryDimension - * @return - */ - public static int getKeyOrdinalOfDimensionFromCurrentBlock(List blockDimensions, - CarbonDimension queryDimension) { - int keyOrdinalInCurrentDimensionBlock = -1; - for (CarbonDimension blockDimension : blockDimensions) { - if (queryDimension.getColumnId().equals(blockDimension.getColumnId())) { - keyOrdinalInCurrentDimensionBlock = blockDimension.getKeyOrdinal(); - break; - } - } - return keyOrdinalInCurrentDimensionBlock; - } - - /** * Below method will be used to add the children block index * this will be basically for complex dimension which will have children * @@ -438,54 +419,6 @@ public class QueryUtil { } /** - * This method will create the updated list of filter measures present in the current block - * - * @param queryFilterMeasures - * @param currentBlockMeasures - * @return - */ - public static Set getUpdatedFilterMeasures(Set queryFilterMeasures, - List currentBlockMeasures) { - if (!queryFilterMeasures.isEmpty()) { - Set updatedFilterMeasures = new HashSet<>(queryFilterMeasures.size()); - for (CarbonMeasure queryMeasure : queryFilterMeasures) { - CarbonMeasure measureFromCurrentBlock = - CarbonUtil.getMeasureFromCurrentBlock(currentBlockMeasures, queryMeasure.getColumnId()); - if (null != measureFromCurrentBlock) { - updatedFilterMeasures.add(measureFromCurrentBlock); - } - } - return updatedFilterMeasures; - } else { - return queryFilterMeasures; - } - } - - /** - * This method will create the updated list of filter dimensions present in the current block - * - * @param queryFilterDimensions - * @param currentBlockDimensions - * @return - */ - public static Set getUpdatedFilterDimensions( - Set queryFilterDimensions, List currentBlockDimensions) { - if (!queryFilterDimensions.isEmpty()) { - Set updatedFilterDimensions = new HashSet<>(queryFilterDimensions.size()); - for (CarbonDimension queryDimension : queryFilterDimensions) { - CarbonDimension dimensionFromCurrentBlock = - CarbonUtil.getDimensionFromCurrentBlock(currentBlockDimensions, queryDimension); - if (null != dimensionFromCurrentBlock) { - updatedFilterDimensions.add(dimensionFromCurrentBlock); - } - } - return updatedFilterDimensions; - } else { - return queryFilterDimensions; - } - } - - /** * Below method will be used to get mapping whether dimension is present in * order by or not * http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java index ac6d60e..955f1f1 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java @@ -58,13 +58,17 @@ public class RestructureUtil { * @param tableComplexDimension * @return list of query dimension which is present in the table block */ - public static List createDimensionInfoAndGetUpdatedQueryDimension( + public static List createDimensionInfoAndGetCurrentBlockQueryDimension( BlockExecutionInfo blockExecutionInfo, List queryDimensions, List tableBlockDimensions, List tableComplexDimension) { List presentDimension = new ArrayList(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); boolean[] isDimensionExists = new boolean[queryDimensions.size()]; Object[] defaultValues = new Object[queryDimensions.size()]; + // create dimension information instance + DimensionInfo dimensionInfo = new DimensionInfo(isDimensionExists, defaultValues); + int newDictionaryColumnCount = 0; + int newNoDictionaryColumnCount = 0; // selecting only those dimension which is present in the query int dimIndex = 0; for (QueryDimension queryDimension : queryDimensions) { @@ -90,35 +94,43 @@ public class RestructureUtil { break; } } + // if dimension is found then no need to search in the complex dimensions list + if (isDimensionExists[dimIndex]) { + dimIndex++; + continue; + } + for (CarbonDimension tableDimension : tableComplexDimension) { + if (tableDimension.getColumnId().equals(queryDimension.getDimension().getColumnId())) { + QueryDimension currentBlockDimension = new QueryDimension(tableDimension.getColName()); + // TODO: for complex dimension set scale and precision by traversing + // the child dimensions + currentBlockDimension.setDimension(tableDimension); + currentBlockDimension.setQueryOrder(queryDimension.getQueryOrder()); + presentDimension.add(currentBlockDimension); + isDimensionExists[dimIndex] = true; + break; + } + } // add default value only in case query dimension is not found in the current block if (!isDimensionExists[dimIndex]) { defaultValues[dimIndex] = validateAndGetDefaultValue(queryDimension.getDimension()); blockExecutionInfo.setRestructuredBlock(true); + // set the flag to say whether a new dictionary column or no dictionary column + // has been added. This will be useful after restructure for compaction scenarios where + // newly added columns data need to be filled + if (queryDimension.getDimension().hasEncoding(Encoding.DICTIONARY)) { + dimensionInfo.setDictionaryColumnAdded(true); + newDictionaryColumnCount++; + } else { + dimensionInfo.setNoDictionaryColumnAdded(true); + newNoDictionaryColumnCount++; + } } } dimIndex++; } - dimIndex = 0; - for (QueryDimension queryDimension : queryDimensions) { - for (CarbonDimension tableDimension : tableComplexDimension) { - if (tableDimension.getColumnId().equals(queryDimension.getDimension().getColumnId())) { - QueryDimension currentBlockDimension = new QueryDimension(tableDimension.getColName()); - currentBlockDimension.setDimension(tableDimension); - currentBlockDimension.setQueryOrder(queryDimension.getQueryOrder()); - presentDimension.add(currentBlockDimension); - isDimensionExists[dimIndex] = true; - break; - } - } - // add default value only in case query dimension is not found in the current block - if (!isDimensionExists[dimIndex]) { - defaultValues[dimIndex] = - validateAndGetDefaultValue(queryDimension.getDimension()); - } - dimIndex++; - } - DimensionInfo dimensionInfo = - new DimensionInfo(isDimensionExists, defaultValues); + dimensionInfo.setNewDictionaryColumnCount(newDictionaryColumnCount); + dimensionInfo.setNewNoDictionaryColumnCount(newNoDictionaryColumnCount); blockExecutionInfo.setDimensionInfo(dimensionInfo); return presentDimension; } @@ -300,7 +312,7 @@ public class RestructureUtil { * @param currentBlockMeasures current block measures * @return measures present in the block */ - public static List createMeasureInfoAndGetUpdatedQueryMeasures( + public static List createMeasureInfoAndGetCurrentBlockQueryMeasures( BlockExecutionInfo blockExecutionInfo, List queryMeasures, List currentBlockMeasures) { MeasureInfo measureInfo = new MeasureInfo(); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index 627a413..eeb7be7 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -55,7 +55,6 @@ import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; -import org.apache.carbondata.core.scan.executor.util.QueryUtil; import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.ExpressionResult; @@ -170,9 +169,8 @@ public final class FilterUtil { DimColumnResolvedFilterInfo dimColResolvedFilterInfo, SegmentProperties segmentProperties) { if (dimColResolvedFilterInfo.getDimension().isColumnar()) { - CarbonDimension dimensionFromCurrentBlock = CarbonUtil - .getDimensionFromCurrentBlock(segmentProperties.getDimensions(), - dimColResolvedFilterInfo.getDimension()); + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColResolvedFilterInfo.getDimension()); if (null != dimensionFromCurrentBlock) { // update dimension and column index according to the dimension position in current block DimColumnResolvedFilterInfo dimColResolvedFilterInfoCopyObject = @@ -200,9 +198,8 @@ public final class FilterUtil { DimColumnResolvedFilterInfo dimColResolvedFilterInfo, SegmentProperties segmentProperties) { if (dimColResolvedFilterInfo.getDimension().isColumnar()) { - CarbonDimension dimensionFromCurrentBlock = CarbonUtil - .getDimensionFromCurrentBlock(segmentProperties.getDimensions(), - dimColResolvedFilterInfo.getDimension()); + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(dimColResolvedFilterInfo.getDimension()); if (null != dimensionFromCurrentBlock) { // update dimension and column index according to the dimension position in current block DimColumnResolvedFilterInfo dimColResolvedFilterInfoCopyObject = @@ -603,53 +600,6 @@ public final class FilterUtil { } /** - * This method will check whether a default value for the non-existing column is present - * in the filter values list - * - * @param dimColumnEvaluatorInfo - * @return - */ - public static boolean isDimensionDefaultValuePresentInFilterValues( - DimColumnResolvedFilterInfo dimColumnEvaluatorInfo) { - boolean isDefaultValuePresentInFilterValues = false; - DimColumnFilterInfo filterValues = dimColumnEvaluatorInfo.getFilterValues(); - CarbonDimension dimension = dimColumnEvaluatorInfo.getDimension(); - byte[] defaultValue = dimension.getDefaultValue(); - if (!dimension.hasEncoding(Encoding.DICTIONARY)) { - // for no dictionary cases - // 3 cases: is NUll, is Not Null and filter on default value of newly added column - if (null == defaultValue) { - // default value for case where user gives is Null condition - defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL - .getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)); - } - List noDictionaryFilterValuesList = filterValues.getNoDictionaryFilterValuesList(); - for (byte[] filterValue : noDictionaryFilterValuesList) { - int compare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterValue); - if (compare == 0) { - isDefaultValuePresentInFilterValues = true; - break; - } - } - } else { - // for dictionary and direct dictionary cases - // 3 cases: is NUll, is Not Null and filter on default value of newly added column - int defaultSurrogateValueToCompare = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; - if (null != defaultValue) { - defaultSurrogateValueToCompare++; - } - List filterList = filterValues.getFilterList(); - for (Integer filterValue : filterList) { - if (defaultSurrogateValueToCompare == filterValue) { - isDefaultValuePresentInFilterValues = true; - break; - } - } - } - return isDefaultValuePresentInFilterValues; - } - - /** * Below method will be used to covert the filter surrogate keys * to mdkey * @@ -906,14 +856,15 @@ public final class FilterUtil { if (isExcludePresent) { continue; } - int keyOrdinalOfDimensionFromCurrentBlock = QueryUtil - .getKeyOrdinalOfDimensionFromCurrentBlock(segmentProperties.getDimensions(), - entry.getKey()); - // if key ordinal is -1 that means this dimension does not exist in the current block. + // search the query dimension in current block dimensions. If the dimension is not found + // that means the key cannot be included in start key formation. // Applicable for restructure scenarios - if (keyOrdinalOfDimensionFromCurrentBlock == -1) { + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(entry.getKey()); + if (null == dimensionFromCurrentBlock) { continue; } + int keyOrdinalOfDimensionFromCurrentBlock = dimensionFromCurrentBlock.getKeyOrdinal(); for (DimColumnFilterInfo info : values) { if (startKey[keyOrdinalOfDimensionFromCurrentBlock] < info.getFilterList().get(0)) { startKey[keyOrdinalOfDimensionFromCurrentBlock] = info.getFilterList().get(0); @@ -970,14 +921,15 @@ public final class FilterUtil { if (isExcludeFilterPresent) { continue; } - int keyOrdinalOfDimensionFromCurrentBlock = QueryUtil - .getKeyOrdinalOfDimensionFromCurrentBlock(segmentProperties.getDimensions(), - entry.getKey()); - // if key ordinal is -1 that means this dimension does not exist in the current block. + // search the query dimension in current block dimensions. If the dimension is not found + // that means the key cannot be included in start key formation. // Applicable for restructure scenarios - if (keyOrdinalOfDimensionFromCurrentBlock == -1) { + CarbonDimension dimensionFromCurrentBlock = + segmentProperties.getDimensionFromCurrentBlock(entry.getKey()); + if (null == dimensionFromCurrentBlock) { continue; } + int keyOrdinalOfDimensionFromCurrentBlock = dimensionFromCurrentBlock.getKeyOrdinal(); for (DimColumnFilterInfo info : values) { if (endKey[keyOrdinalOfDimensionFromCurrentBlock] > info.getFilterList() .get(info.getFilterList().size() - 1)) { @@ -1485,7 +1437,7 @@ public final class FilterUtil { // create and fill bitset for the last page BitSet bitSet = new BitSet(rowCountForLastPage); bitSet.set(0, rowCountForLastPage, defaultValue); - bitSetGroup.setBitSet(bitSet, pageCount); + bitSetGroup.setBitSet(bitSet, pagesTobeFullFilled); return bitSetGroup; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java new file mode 100644 index 0000000..65184fb --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.filter.executer; + +import java.nio.charset.Charset; +import java.util.List; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.encoder.Encoding; +import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; +import org.apache.carbondata.core.scan.filter.DimColumnFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.util.ByteUtil; + +/** + * Abstract class for restructure + */ +public abstract class RestructureEvaluatorImpl implements FilterExecuter { + + /** + * This method will check whether a default value for the non-existing column is present + * in the filter values list + * + * @param dimColumnEvaluatorInfo + * @return + */ + protected boolean isDimensionDefaultValuePresentInFilterValues( + DimColumnResolvedFilterInfo dimColumnEvaluatorInfo) { + boolean isDefaultValuePresentInFilterValues = false; + DimColumnFilterInfo filterValues = dimColumnEvaluatorInfo.getFilterValues(); + CarbonDimension dimension = dimColumnEvaluatorInfo.getDimension(); + byte[] defaultValue = dimension.getDefaultValue(); + if (!dimension.hasEncoding(Encoding.DICTIONARY)) { + // for no dictionary cases + // 3 cases: is NUll, is Not Null and filter on default value of newly added column + if (null == defaultValue) { + // default value for case where user gives is Null condition + defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL + .getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)); + } + List noDictionaryFilterValuesList = filterValues.getNoDictionaryFilterValuesList(); + for (byte[] filterValue : noDictionaryFilterValuesList) { + int compare = ByteUtil.UnsafeComparer.INSTANCE.compareTo(defaultValue, filterValue); + if (compare == 0) { + isDefaultValuePresentInFilterValues = true; + break; + } + } + } else { + // for dictionary and direct dictionary cases + // 3 cases: is NUll, is Not Null and filter on default value of newly added column + int defaultSurrogateValueToCompare = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY; + if (null != defaultValue) { + defaultSurrogateValueToCompare++; + } + List filterList = filterValues.getFilterList(); + for (Integer filterValue : filterList) { + if (defaultSurrogateValueToCompare == filterValue) { + isDefaultValuePresentInFilterValues = true; + break; + } + } + } + return isDefaultValuePresentInFilterValues; + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java index 68f0dfe..2954c40 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureExcludeFilterExecutorImpl.java @@ -25,7 +25,7 @@ import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnRes import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; -public class RestructureExcludeFilterExecutorImpl implements FilterExecuter { +public class RestructureExcludeFilterExecutorImpl extends RestructureEvaluatorImpl { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected SegmentProperties segmentProperties; @@ -41,16 +41,14 @@ public class RestructureExcludeFilterExecutorImpl implements FilterExecuter { this.dimColEvaluatorInfo = dimColEvaluatorInfo; this.segmentProperties = segmentProperties; isDefaultValuePresentInFilterValues = - FilterUtil.isDimensionDefaultValuePresentInFilterValues(dimColEvaluatorInfo); + isDimensionDefaultValuePresentInFilterValues(dimColEvaluatorInfo); } @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException { int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); - BitSetGroup bitSetGroup = new BitSetGroup(1); - BitSet bitSet = new BitSet(numberOfRows); - bitSet.set(0, numberOfRows, !isDefaultValuePresentInFilterValues); - bitSetGroup.setBitSet(bitSet, 0); - return bitSetGroup; + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, !isDefaultValuePresentInFilterValues); } @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java index c1ba2af..03fe2b4 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureIncludeFilterExecutorImpl.java @@ -25,7 +25,7 @@ import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnRes import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; -public class RestructureIncludeFilterExecutorImpl implements FilterExecuter { +public class RestructureIncludeFilterExecutorImpl extends RestructureEvaluatorImpl { protected DimColumnResolvedFilterInfo dimColumnEvaluatorInfo; protected SegmentProperties segmentProperties; @@ -41,16 +41,14 @@ public class RestructureIncludeFilterExecutorImpl implements FilterExecuter { this.dimColumnEvaluatorInfo = dimColumnEvaluatorInfo; this.segmentProperties = segmentProperties; isDefaultValuePresentInFilterValues = - FilterUtil.isDimensionDefaultValuePresentInFilterValues(dimColumnEvaluatorInfo); + isDimensionDefaultValuePresentInFilterValues(dimColumnEvaluatorInfo); } @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException { int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); - BitSetGroup bitSetGroup = new BitSetGroup(1); - BitSet bitSet = new BitSet(numberOfRows); - bitSet.set(0, numberOfRows, isDefaultValuePresentInFilterValues); - bitSetGroup.setBitSet(bitSet, 0); - return bitSetGroup; + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, isDefaultValuePresentInFilterValues); } public BitSet isScanRequired(byte[][] blkMaxVal, byte[][] blkMinVal) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java index 4f28ef3..10902ea 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java @@ -124,9 +124,8 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter { private void initDimensionBlockIndexes() { for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) { // find the dimension in the current block dimensions list - CarbonDimension dimensionFromCurrentBlock = CarbonUtil - .getDimensionFromCurrentBlock(segmentProperties.getDimensions(), - dimColEvaluatorInfoList.get(i).getDimension()); + CarbonDimension dimensionFromCurrentBlock = segmentProperties + .getDimensionFromCurrentBlock(dimColEvaluatorInfoList.get(i).getDimension()); if (null != dimensionFromCurrentBlock) { dimColEvaluatorInfoList.get(i).setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); this.dimensionBlocksIndex[i] = segmentProperties.getDimensionOrdinalToBlockMapping() @@ -143,9 +142,8 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter { private void initMeasureBlockIndexes() { for (int i = 0; i < msrColEvalutorInfoList.size(); i++) { // find the measure in the current block measures list - CarbonMeasure measureFromCurrentBlock = CarbonUtil - .getMeasureFromCurrentBlock(segmentProperties.getMeasures(), - msrColEvalutorInfoList.get(i).getCarbonColumn().getColumnId()); + CarbonMeasure measureFromCurrentBlock = segmentProperties.getMeasureFromCurrentBlock( + msrColEvalutorInfoList.get(i).getCarbonColumn().getColumnId()); if (null != measureFromCurrentBlock) { msrColEvalutorInfoList.get(i).setColumnIndex(measureFromCurrentBlock.getOrdinal()); this.measureBlocksIndex[i] = segmentProperties.getMeasuresOrdinalToBlockMapping() @@ -248,8 +246,7 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter { memberBytes = null; } record[dimColumnEvaluatorInfo.getRowIndex()] = DataTypeUtil - .getDataBasedOnDataType(memberBytes, - dimColumnEvaluatorInfo.getDimension().getDataType()); + .getDataBasedOnDataType(memberBytes, dimColumnEvaluatorInfo.getDimension()); } else { continue; } @@ -463,20 +460,6 @@ public class RowLevelFilterExecuterImpl implements FilterExecuter { return bitSet; } - /** - * This method will set the bitset to true by default for a given number of rows - * - * @param numberOfRows - * @return - */ - protected BitSetGroup getDefaultBitSetGroup(int numberOfRows) { - BitSetGroup bitSetGroup = new BitSetGroup(1); - BitSet bitSet = new BitSet(numberOfRows); - bitSet.set(0, numberOfRows, true); - bitSetGroup.setBitSet(bitSet, 0); - return bitSetGroup; - } - @Override public void readBlocks(BlocksChunkHolder blockChunkHolder) throws IOException { for (int i = 0; i < dimColEvaluatorInfoList.size(); i++) { DimColumnResolvedFilterInfo dimColumnEvaluatorInfo = dimColEvaluatorInfoList.get(i); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index e748141..97535a6 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; @@ -52,13 +53,13 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties, null); this.filterRangeValues = filterRangeValues; - checkIfDefaultValueIsPresentInFilterList(); + ifDefaultValueMatchesFilter(); } /** * This method will check whether default value is present in the given filter values */ - private void checkIfDefaultValueIsPresentInFilterList() { + private void ifDefaultValueMatchesFilter() { if (!this.isDimensionPresentInCurrentBlock[0]) { CarbonDimension dimension = this.dimColEvaluatorInfoList.get(0).getDimension(); byte[] defaultValue = dimension.getDefaultValue(); @@ -110,7 +111,10 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute throws FilterUnsupportedException, IOException { // select all rows if dimension does not exists in the current block if (!isDimensionPresentInCurrentBlock[0]) { - return getDefaultBitSetGroup(blockChunkHolder.getDataBlock().nodeSize()); + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); } if (!dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY)) { return super.applyFilter(blockChunkHolder); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java index f188a67..6eb2a6f 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; +import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; @@ -53,13 +54,13 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties, null); this.filterRangeValues = filterRangeValues; - checkIfDefaultValueIsPresentInFilterList(); + ifDefaultValueMatchesFilter(); } /** * This method will check whether default value is present in the given filter values */ - private void checkIfDefaultValueIsPresentInFilterList() { + private void ifDefaultValueMatchesFilter() { if (!this.isDimensionPresentInCurrentBlock[0]) { CarbonDimension dimension = this.dimColEvaluatorInfoList.get(0).getDimension(); byte[] defaultValue = dimension.getDefaultValue(); @@ -111,7 +112,10 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte throws FilterUnsupportedException, IOException { // select all rows if dimension does not exists in the current block if (!isDimensionPresentInCurrentBlock[0]) { - return getDefaultBitSetGroup(blockChunkHolder.getDataBlock().nodeSize()); + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); } if (!dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY)) { return super.applyFilter(blockChunkHolder); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java index f455acf..306becf 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java @@ -55,13 +55,13 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties, null); this.filterRangeValues = filterRangeValues; - checkIfDefaultValueIsPresentInFilterList(); + ifDefaultValueMatchesFilter(); } /** * This method will check whether default value is present in the given filter values */ - private void checkIfDefaultValueIsPresentInFilterList() { + private void ifDefaultValueMatchesFilter() { if (!this.isDimensionPresentInCurrentBlock[0]) { CarbonDimension dimension = this.dimColEvaluatorInfoList.get(0).getDimension(); byte[] defaultValue = dimension.getDefaultValue(); @@ -113,7 +113,10 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter throws FilterUnsupportedException, IOException { // select all rows if dimension does not exists in the current block if (!isDimensionPresentInCurrentBlock[0]) { - return getDefaultBitSetGroup(blockChunkHolder.getDataBlock().nodeSize()); + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); } if (!dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY)) { return super.applyFilter(blockChunkHolder); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java index 69c0538..f2a49d9 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java @@ -55,13 +55,13 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut super(dimColEvaluatorInfoList, msrColEvalutorInfoList, exp, tableIdentifier, segmentProperties, null); this.filterRangeValues = filterRangeValues; - checkIfDefaultValueIsPresentInFilterList(); + ifDefaultValueMatchesFilter(); } /** * This method will check whether default value is present in the given filter values */ - private void checkIfDefaultValueIsPresentInFilterList() { + private void ifDefaultValueMatchesFilter() { if (!this.isDimensionPresentInCurrentBlock[0]) { CarbonDimension dimension = this.dimColEvaluatorInfoList.get(0).getDimension(); byte[] defaultValue = dimension.getDefaultValue(); @@ -113,7 +113,10 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut throws FilterUnsupportedException, IOException { // select all rows if dimension does not exists in the current block if (!isDimensionPresentInCurrentBlock[0]) { - return getDefaultBitSetGroup(blockChunkHolder.getDataBlock().nodeSize()); + int numberOfRows = blockChunkHolder.getDataBlock().nodeSize(); + return FilterUtil + .createBitSetGroupWithDefaultValue(blockChunkHolder.getDataBlock().numberOfPages(), + numberOfRows, true); } if (!dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY)) { return super.applyFilter(blockChunkHolder); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/RowLevelRangeFilterResolverImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/RowLevelRangeFilterResolverImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/RowLevelRangeFilterResolverImpl.java index 8a91499..2aa4543 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/RowLevelRangeFilterResolverImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/RowLevelRangeFilterResolverImpl.java @@ -43,7 +43,6 @@ import org.apache.carbondata.core.scan.filter.intf.FilterExecuterType; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.util.ByteUtil; -import org.apache.carbondata.core.util.CarbonUtil; public class RowLevelRangeFilterResolverImpl extends ConditionalFilterResolverImpl { @@ -80,9 +79,8 @@ public class RowLevelRangeFilterResolverImpl extends ConditionalFilterResolverIm return noDictFilterValuesList.toArray((new byte[noDictFilterValuesList.size()][])); } else if (null != dimColEvaluatorInfoList.get(0).getFilterValues() && dimColEvaluatorInfoList .get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - CarbonDimension dimensionFromCurrentBlock = CarbonUtil - .getDimensionFromCurrentBlock(segmentProperties.getDimensions(), - this.dimColEvaluatorInfoList.get(0).getDimension()); + CarbonDimension dimensionFromCurrentBlock = segmentProperties + .getDimensionFromCurrentBlock(this.dimColEvaluatorInfoList.get(0).getDimension()); if (null != dimensionFromCurrentBlock) { return FilterUtil.getKeyArray(this.dimColEvaluatorInfoList.get(0).getFilterValues(), dimensionFromCurrentBlock, segmentProperties); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/NoDictionaryTypeVisitor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/NoDictionaryTypeVisitor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/NoDictionaryTypeVisitor.java index c51133d..351d2c0 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/NoDictionaryTypeVisitor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/resolver/resolverinfo/visitor/NoDictionaryTypeVisitor.java @@ -20,7 +20,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.scan.expression.ExpressionResult; +import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression; import org.apache.carbondata.core.scan.expression.exception.FilterIllegalMemberException; import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; import org.apache.carbondata.core.scan.filter.DimColumnFilterInfo; @@ -45,16 +45,17 @@ public class NoDictionaryTypeVisitor implements ResolvedFilterInfoVisitorIntf { public void populateFilterResolvedInfo(DimColumnResolvedFilterInfo visitableObj, FilterResolverMetadata metadata) throws FilterUnsupportedException { DimColumnFilterInfo resolvedFilterObject = null; - List evaluateResultListFinal; + List evaluateResultListFinal = null; try { - ExpressionResult result = metadata.getExpression().evaluate(null); - Boolean booleanResult = result.getBoolean(); // handling for is null case scenarios - if (null != booleanResult && booleanResult == metadata.isIncludeFilter()) { - evaluateResultListFinal = new ArrayList<>(1); - evaluateResultListFinal.add(CarbonCommonConstants.MEMBER_DEFAULT_VAL); + if (metadata.getExpression() instanceof EqualToExpression) { + EqualToExpression expression = (EqualToExpression) metadata.getExpression(); + if (expression.isNull) { + evaluateResultListFinal = new ArrayList<>(1); + evaluateResultListFinal.add(CarbonCommonConstants.MEMBER_DEFAULT_VAL); + } } else { - evaluateResultListFinal = result.getListAsString(); + evaluateResultListFinal = metadata.getExpression().evaluate(null).getListAsString(); } // Adding default null member inorder to not display the same while // displaying the report as per hive compatibility. http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java b/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java index 9162ff8..210ee11 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java @@ -172,7 +172,7 @@ public class QueryModel implements Serializable { private static CarbonMeasure getCarbonMetadataMeasure(String name, List measures) { for (CarbonMeasure measure : measures) { - if (!measure.isInvisible() && measure.getColName().equalsIgnoreCase(name)) { + if (measure.getColName().equalsIgnoreCase(name)) { return measure; } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java index 2e701c9..4952e07 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java @@ -23,20 +23,34 @@ public interface CarbonColumnVector { void putShort(int rowId, short value); + void putShorts(int rowId, int count, short value); + void putInt(int rowId, int value); + void putInts(int rowId, int count, int value); + void putLong(int rowId, long value); + void putLongs(int rowId, int count, long value); + void putDecimal(int rowId, Decimal value, int precision); + void putDecimals(int rowId, int count, Decimal value, int precision); + void putDouble(int rowId, double value); + void putDoubles(int rowId, int count, double value); + void putBytes(int rowId, byte[] value); + void putBytes(int rowId, int count, byte[] value); + void putBytes(int rowId, int offset, int length, byte[] value); void putNull(int rowId); + void putNulls(int rowId, int count); + boolean isNull(int rowId); void putObject(int rowId, Object obj); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 4e2d995..c7a6f2d 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -39,9 +39,6 @@ import java.util.Set; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.cache.Cache; -import org.apache.carbondata.core.cache.CacheProvider; -import org.apache.carbondata.core.cache.CacheType; import org.apache.carbondata.core.cache.dictionary.Dictionary; import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier; import org.apache.carbondata.core.constants.CarbonCommonConstants; @@ -56,18 +53,13 @@ import org.apache.carbondata.core.datastore.columnar.UnBlockIndexer; import org.apache.carbondata.core.datastore.compression.MeasureMetaDataModel; import org.apache.carbondata.core.datastore.compression.WriterCompressModel; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; -import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.keygenerator.mdkey.NumberCompressor; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.metadata.CarbonTableIdentifier; -import org.apache.carbondata.core.metadata.ColumnIdentifier; import org.apache.carbondata.core.metadata.ValueEncoderMeta; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.encoder.Encoding; -import org.apache.carbondata.core.metadata.schema.table.CarbonTable; -import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; @@ -1068,22 +1060,20 @@ public final class CarbonUtil { List isDictionaryDimensions = new ArrayList(); Set processedColumnGroup = new HashSet(); for (CarbonDimension carbonDimension : tableDimensionList) { - if (!carbonDimension.isInvisible()) { - List childs = carbonDimension.getListOfChildDimensions(); - //assuming complex dimensions will always be atlast - if (null != childs && childs.size() > 0) { - break; - } - if (carbonDimension.isColumnar() && hasEncoding(carbonDimension.getEncoder(), - Encoding.DICTIONARY)) { + List childs = carbonDimension.getListOfChildDimensions(); + //assuming complex dimensions will always be atlast + if (null != childs && childs.size() > 0) { + break; + } + if (carbonDimension.isColumnar() && hasEncoding(carbonDimension.getEncoder(), + Encoding.DICTIONARY)) { + isDictionaryDimensions.add(true); + } else if (!carbonDimension.isColumnar()) { + if (processedColumnGroup.add(carbonDimension.columnGroupId())) { isDictionaryDimensions.add(true); - } else if (!carbonDimension.isColumnar()) { - if (processedColumnGroup.add(carbonDimension.columnGroupId())) { - isDictionaryDimensions.add(true); - } - } else { - isDictionaryDimensions.add(false); } + } else { + isDictionaryDimensions.add(false); } } boolean[] primitive = ArrayUtils @@ -1157,7 +1147,7 @@ public final class CarbonUtil { public static CarbonDimension findDimension(List dimensions, String carbonDim) { CarbonDimension findDim = null; for (CarbonDimension dimension : dimensions) { - if (!dimension.isInvisible() && dimension.getColName().equalsIgnoreCase(carbonDim)) { + if (dimension.getColName().equalsIgnoreCase(carbonDim)) { findDim = dimension; break; } @@ -1243,9 +1233,7 @@ public final class CarbonUtil { List wrapperColumnSchemaList = new ArrayList(); fillCollumnSchemaListForComplexDims(carbonDimensionsList, wrapperColumnSchemaList); for (CarbonMeasure carbonMeasure : carbonMeasureList) { - if (!carbonMeasure.isInvisible()) { - wrapperColumnSchemaList.add(carbonMeasure.getColumnSchema()); - } + wrapperColumnSchemaList.add(carbonMeasure.getColumnSchema()); } return wrapperColumnSchemaList; } @@ -1253,12 +1241,10 @@ public final class CarbonUtil { private static void fillCollumnSchemaListForComplexDims( List carbonDimensionsList, List wrapperColumnSchemaList) { for (CarbonDimension carbonDimension : carbonDimensionsList) { - if (!carbonDimension.isInvisible()) { - wrapperColumnSchemaList.add(carbonDimension.getColumnSchema()); - List childDims = carbonDimension.getListOfChildDimensions(); - if (null != childDims && childDims.size() > 0) { - fillCollumnSchemaListForComplexDims(childDims, wrapperColumnSchemaList); - } + wrapperColumnSchemaList.add(carbonDimension.getColumnSchema()); + List childDims = carbonDimension.getListOfChildDimensions(); + if (null != childDims && childDims.size() > 0) { + fillCollumnSchemaListForComplexDims(childDims, wrapperColumnSchemaList); } } } @@ -1666,69 +1652,6 @@ public final class CarbonUtil { } /** - * This method will delete the dictionary files for the given column IDs and - * clear the dictionary cache - * - * @param dictionaryColumns - * @param carbonTable - */ - public static void deleteDictionaryFileAndCache(List dictionaryColumns, - CarbonTable carbonTable) { - if (!dictionaryColumns.isEmpty()) { - CarbonTableIdentifier carbonTableIdentifier = carbonTable.getCarbonTableIdentifier(); - CarbonTablePath carbonTablePath = - CarbonStorePath.getCarbonTablePath(carbonTable.getStorePath(), carbonTableIdentifier); - String metadataDirectoryPath = carbonTablePath.getMetadataDirectoryPath(); - CarbonFile metadataDir = FileFactory - .getCarbonFile(metadataDirectoryPath, FileFactory.getFileType(metadataDirectoryPath)); - for (final CarbonColumn column : dictionaryColumns) { - // sort index file is created with dictionary size appended to it. So all the files - // with a given column ID need to be listed - CarbonFile[] listFiles = metadataDir.listFiles(new CarbonFileFilter() { - @Override public boolean accept(CarbonFile path) { - if (path.getName().startsWith(column.getColumnId())) { - return true; - } - return false; - } - }); - for (CarbonFile file : listFiles) { - // try catch is inside for loop because even if one deletion fails, other files - // still need to be deleted - try { - FileFactory.deleteFile(file.getCanonicalPath(), - FileFactory.getFileType(file.getCanonicalPath())); - } catch (IOException e) { - LOGGER.error( - "Failed to delete dictionary or sortIndex file for column " + column.getColName() - + "with column ID " + column.getColumnId()); - } - } - // remove dictionary cache - removeDictionaryColumnFromCache(carbonTable, column.getColumnId()); - } - } - } - - /** - * This method will remove dictionary cache from driver for both reverse and forward dictionary - * - * @param carbonTable - * @param columnId - */ - public static void removeDictionaryColumnFromCache(CarbonTable carbonTable, String columnId) { - Cache dictCache = CacheProvider.getInstance() - .createCache(CacheType.REVERSE_DICTIONARY, carbonTable.getStorePath()); - DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = - new DictionaryColumnUniqueIdentifier(carbonTable.getCarbonTableIdentifier(), - new ColumnIdentifier(columnId, null, null)); - dictCache.invalidate(dictionaryColumnUniqueIdentifier); - dictCache = CacheProvider.getInstance() - .createCache(CacheType.FORWARD_DICTIONARY, carbonTable.getStorePath()); - dictCache.invalidate(dictionaryColumnUniqueIdentifier); - } - - /** * Below method will be used to convert byte data to surrogate key based * column value size * http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index 1ba8942..3162d84 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -248,36 +248,6 @@ public final class DataTypeUtil { } /** - * This method will convert the data according to its data type and perform a - * special handling for decimal data types - * - * @param dataInBytes - * @param dimension - * @return - */ - public static Object getDataBasedOnDataType(byte[] dataInBytes, CarbonDimension dimension) { - if (null == dataInBytes || Arrays - .equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, dataInBytes)) { - return null; - } - switch (dimension.getDataType()) { - case DECIMAL: - String data = new String(dataInBytes, CarbonCommonConstants.DEFAULT_CHARSET_CLASS); - if (data.isEmpty()) { - return null; - } - java.math.BigDecimal javaDecVal = new java.math.BigDecimal(data); - if (dimension.getColumnSchema().getScale() > javaDecVal.scale()) { - javaDecVal = - javaDecVal.setScale(dimension.getColumnSchema().getScale(), RoundingMode.HALF_UP); - } - return org.apache.spark.sql.types.Decimal.apply(javaDecVal); - default: - return getDataBasedOnDataType(dataInBytes, dimension.getDataType()); - } - } - - /** * Below method will be used to convert the data passed to its actual data * type * @@ -355,16 +325,16 @@ public final class DataTypeUtil { * type * * @param dataInBytes data - * @param actualDataType actual data type + * @param dimension * @return actual data after conversion */ - public static Object getDataBasedOnDataType(byte[] dataInBytes, DataType actualDataType) { + public static Object getDataBasedOnDataType(byte[] dataInBytes, CarbonDimension dimension) { if (null == dataInBytes || Arrays .equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, dataInBytes)) { return null; } try { - switch (actualDataType) { + switch (dimension.getDataType()) { case INT: String data1 = new String(dataInBytes, CarbonCommonConstants.DEFAULT_CHARSET_CLASS); if (data1.isEmpty()) { @@ -420,6 +390,10 @@ public final class DataTypeUtil { return null; } java.math.BigDecimal javaDecVal = new java.math.BigDecimal(data7); + if (dimension.getColumnSchema().getScale() > javaDecVal.scale()) { + javaDecVal = + javaDecVal.setScale(dimension.getColumnSchema().getScale()); + } return org.apache.spark.sql.types.Decimal.apply(javaDecVal); default: return UTF8String.fromBytes(dataInBytes); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java b/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java index 09df23e..5387823 100644 --- a/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java +++ b/core/src/test/java/org/apache/carbondata/core/scan/executor/util/RestructureUtilTest.java @@ -21,7 +21,6 @@ import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; -import org.apache.carbondata.core.scan.executor.exception.QueryExecutionException; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; import org.apache.carbondata.core.scan.executor.infos.MeasureInfo; import org.apache.carbondata.core.scan.model.QueryDimension; @@ -87,7 +86,7 @@ public class RestructureUtilTest { List result = null; result = RestructureUtil - .createDimensionInfoAndGetUpdatedQueryDimension(blockExecutionInfo, queryDimensions, + .createDimensionInfoAndGetCurrentBlockQueryDimension(blockExecutionInfo, queryDimensions, tableBlockDimensions, tableComplexDimensions); List resultDimension = new ArrayList<>(result.size()); for (QueryDimension queryDimension : result) { @@ -125,7 +124,7 @@ public class RestructureUtilTest { queryMeasure3.setMeasure(carbonMeasure3); List queryMeasures = Arrays.asList(queryMeasure1, queryMeasure2, queryMeasure3); BlockExecutionInfo blockExecutionInfo = new BlockExecutionInfo(); - RestructureUtil.createMeasureInfoAndGetUpdatedQueryMeasures(blockExecutionInfo, queryMeasures, + RestructureUtil.createMeasureInfoAndGetCurrentBlockQueryMeasures(blockExecutionInfo, queryMeasures, currentBlockMeasures); MeasureInfo measureInfo = blockExecutionInfo.getMeasureInfo(); boolean[] measuresExist = { true, true, false }; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java index f61f1c9..04fdc84 100644 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java +++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java @@ -186,11 +186,9 @@ public class CarbonCompactionExecutor { carbonTable.getDimensionByTableName(carbonTable.getFactTableName()); for (CarbonDimension dim : dimensions) { // check if dimension is deleted - if (!dim.isInvisible()) { - QueryDimension queryDimension = new QueryDimension(dim.getColName()); - queryDimension.setDimension(dim); - dims.add(queryDimension); - } + QueryDimension queryDimension = new QueryDimension(dim.getColName()); + queryDimension.setDimension(dim); + dims.add(queryDimension); } model.setQueryDimension(dims); @@ -199,11 +197,9 @@ public class CarbonCompactionExecutor { carbonTable.getMeasureByTableName(carbonTable.getFactTableName()); for (CarbonMeasure carbonMeasure : measures) { // check if measure is deleted - if (!carbonMeasure.isInvisible()) { - QueryMeasure queryMeasure = new QueryMeasure(carbonMeasure.getColName()); - queryMeasure.setMeasure(carbonMeasure); - msrs.add(queryMeasure); - } + QueryMeasure queryMeasure = new QueryMeasure(carbonMeasure.getColName()); + queryMeasure.setMeasure(carbonMeasure); + msrs.add(queryMeasure); } model.setQueryMeasures(msrs); model.setQueryId(System.nanoTime() + ""); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionUtil.java ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionUtil.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionUtil.java index 5f5b149..ba9c4a8 100644 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionUtil.java +++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionUtil.java @@ -311,23 +311,19 @@ public class CarbonCompactionUtil { carbonTable.getDimensionByTableName(carbonTable.getFactTableName()); List updatedCardinalityList = new ArrayList<>(columnCardinalityMap.size()); for (CarbonDimension dimension : masterDimensions) { - if (!dimension.isInvisible()) { - Integer value = columnCardinalityMap.get(dimension.getColumnId()); - if (null == value) { - updatedCardinalityList.add(getDimensionDefaultCardinality(dimension)); - } else { - updatedCardinalityList.add(value); - } - updatedColumnSchemaList.add(dimension.getColumnSchema()); + Integer value = columnCardinalityMap.get(dimension.getColumnId()); + if (null == value) { + updatedCardinalityList.add(getDimensionDefaultCardinality(dimension)); + } else { + updatedCardinalityList.add(value); } + updatedColumnSchemaList.add(dimension.getColumnSchema()); } // add measures to the column schema list List masterSchemaMeasures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName()); for (CarbonMeasure measure : masterSchemaMeasures) { - if (!measure.isInvisible()) { - updatedColumnSchemaList.add(measure.getColumnSchema()); - } + updatedColumnSchemaList.add(measure.getColumnSchema()); } int[] updatedCardinality = ArrayUtils .toPrimitive(updatedCardinalityList.toArray(new Integer[updatedCardinalityList.size()])); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala index c825da6..d1532d2 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala @@ -422,16 +422,22 @@ class CarbonGlobalDictionaryGenerateRDD( val t3 = System.currentTimeMillis() val dictWriteTask = new DictionaryWriterTask(valuesBuffer, dictionaryForDistinctValueLookUp, - model, - split.index) + model.table, + model.columnIdentifier(split.index), + model.hdfsLocation, + model.primDimensions(split.index).getColumnSchema, + model.dictFileExists(split.index) + ) // execute dictionary writer task to get distinct values val distinctValues = dictWriteTask.execute() val dictWriteTime = System.currentTimeMillis() - t3 val t4 = System.currentTimeMillis() // if new data came than rewrite sort index file if (distinctValues.size() > 0) { - val sortIndexWriteTask = new SortIndexWriterTask(model, - split.index, + val sortIndexWriteTask = new SortIndexWriterTask(model.table, + model.columnIdentifier(split.index), + model.primDimensions(split.index).getDataType, + model.hdfsLocation, dictionaryForDistinctValueLookUp, distinctValues) sortIndexWriteTask.execute() http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/DictionaryWriterTask.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/DictionaryWriterTask.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/DictionaryWriterTask.scala index 3b8f899..2b1ccdf 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/DictionaryWriterTask.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/DictionaryWriterTask.scala @@ -22,22 +22,30 @@ import scala.collection.mutable import org.apache.carbondata.core.cache.dictionary.Dictionary import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.metadata.{CarbonTableIdentifier, ColumnIdentifier} +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema import org.apache.carbondata.core.service.CarbonCommonFactory import org.apache.carbondata.core.util.DataTypeUtil import org.apache.carbondata.core.writer.CarbonDictionaryWriter -import org.apache.carbondata.spark.rdd.DictionaryLoadModel /** * * @param valuesBuffer * @param dictionary - * @param model - * @param columnIndex + * @param carbonTableIdentifier + * @param columnIdentifier + * @param carbonStoreLocation + * @param columnSchema + * @param isDictionaryFileExist * @param writer */ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String], dictionary: Dictionary, - model: DictionaryLoadModel, columnIndex: Int, + carbonTableIdentifier: CarbonTableIdentifier, + columnIdentifier: ColumnIdentifier, + carbonStoreLocation: String, + columnSchema: ColumnSchema, + isDictionaryFileExist: Boolean, var writer: CarbonDictionaryWriter = null) { /** @@ -50,22 +58,22 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String], java.util.Arrays.sort(values, Ordering[String]) val dictService = CarbonCommonFactory.getDictionaryService writer = dictService.getDictionaryWriter( - model.table, - model.columnIdentifier(columnIndex), - model.hdfsLocation) + carbonTableIdentifier, + columnIdentifier, + carbonStoreLocation) val distinctValues: java.util.List[String] = new java.util.ArrayList() try { - if (!model.dictFileExists(columnIndex)) { + if (!isDictionaryFileExist) { writer.write(CarbonCommonConstants.MEMBER_DEFAULT_VAL) distinctValues.add(CarbonCommonConstants.MEMBER_DEFAULT_VAL) } if (values.length >= 1) { - if (model.dictFileExists(columnIndex)) { + if (isDictionaryFileExist) { for (value <- values) { val parsedValue = DataTypeUtil.normalizeColumnValueForItsDataType(value, - model.primDimensions(columnIndex)) + columnSchema) if (null != parsedValue && dictionary.getSurrogateKey(parsedValue) == CarbonCommonConstants.INVALID_SURROGATE_KEY) { writer.write(parsedValue) @@ -76,7 +84,7 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String], } else { for (value <- values) { val parsedValue = DataTypeUtil.normalizeColumnValueForItsDataType(value, - model.primDimensions(columnIndex)) + columnSchema) if (null != parsedValue) { writer.write(parsedValue) distinctValues.add(parsedValue) http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/SortIndexWriterTask.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/SortIndexWriterTask.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/SortIndexWriterTask.scala index 1504b7a..c0aa0f9 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/SortIndexWriterTask.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/tasks/SortIndexWriterTask.scala @@ -17,21 +17,27 @@ package org.apache.carbondata.spark.tasks import org.apache.carbondata.core.cache.dictionary.Dictionary +import org.apache.carbondata.core.metadata.{CarbonTableIdentifier, ColumnIdentifier} +import org.apache.carbondata.core.metadata.datatype.DataType import org.apache.carbondata.core.service.CarbonCommonFactory import org.apache.carbondata.core.writer.sortindex.{CarbonDictionarySortIndexWriter, CarbonDictionarySortInfo, CarbonDictionarySortInfoPreparator} -import org.apache.carbondata.spark.rdd.DictionaryLoadModel /** * This task writes sort index file * - * @param model - * @param index + * @param carbonTableIdentifier + * @param columnIdentifier + * @param dataType + * @param carbonStoreLocation * @param dictionary * @param distinctValues * @param carbonDictionarySortIndexWriter */ -class SortIndexWriterTask(model: DictionaryLoadModel, - index: Int, +class SortIndexWriterTask( + carbonTableIdentifier: CarbonTableIdentifier, + columnIdentifier: ColumnIdentifier, + dataType: DataType, + carbonStoreLocation: String, dictionary: Dictionary, distinctValues: java.util.List[String], var carbonDictionarySortIndexWriter: CarbonDictionarySortIndexWriter = null) { @@ -42,10 +48,10 @@ class SortIndexWriterTask(model: DictionaryLoadModel, val dictService = CarbonCommonFactory.getDictionaryService val dictionarySortInfo: CarbonDictionarySortInfo = preparator.getDictionarySortInfo(distinctValues, dictionary, - model.primDimensions(index).getDataType) + dataType) carbonDictionarySortIndexWriter = - dictService.getDictionarySortIndexWriter(model.table, model.columnIdentifier(index), - model.hdfsLocation) + dictService.getDictionarySortIndexWriter(carbonTableIdentifier, columnIdentifier, + carbonStoreLocation) carbonDictionarySortIndexWriter.writeSortIndex(dictionarySortInfo.getSortIndex) carbonDictionarySortIndexWriter .writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted) http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index bcb7ff7..dc131b1 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -22,6 +22,7 @@ import java.nio.charset.Charset import java.util.regex.Pattern import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} import scala.language.implicitConversions import scala.util.control.Breaks.{break, breakable} @@ -59,6 +60,7 @@ import org.apache.carbondata.processing.model.CarbonLoadModel import org.apache.carbondata.spark.CarbonSparkFactory import org.apache.carbondata.spark.load.CarbonLoaderUtil import org.apache.carbondata.spark.rdd._ +import org.apache.carbondata.spark.tasks.{DictionaryWriterTask, SortIndexWriterTask} /** * A object which provide a method to generate global dictionary from CSV files. @@ -814,22 +816,20 @@ object GlobalDictionaryUtil { val columnIdentifier = new ColumnIdentifier(columnSchema.getColumnUniqueId, null, columnSchema.getDataType) - val writer = CarbonCommonFactory.getDictionaryService - .getDictionaryWriter(tableIdentifier, columnIdentifier, storePath) - - val distinctValues: java.util.List[String] = new java.util.ArrayList() - writer.write(CarbonCommonConstants.MEMBER_DEFAULT_VAL) - distinctValues.add(CarbonCommonConstants.MEMBER_DEFAULT_VAL) - val parsedValue = DataTypeUtil.normalizeColumnValueForItsDataType(defaultValue, columnSchema) + val valuesBuffer = new mutable.HashSet[String] if (null != parsedValue) { - writer.write(parsedValue) - distinctValues.add(parsedValue) + valuesBuffer += parsedValue } - if (null != writer) { - writer.close() - } - + val dictWriteTask = new DictionaryWriterTask(valuesBuffer, + dictionary, + tableIdentifier, + columnIdentifier, + storePath, + columnSchema, + false + ) + val distinctValues = dictWriteTask.execute LOGGER.info(s"Dictionary file writing is successful for new column ${ columnSchema.getColumnName }") @@ -840,17 +840,13 @@ object GlobalDictionaryUtil { storePath, columnSchema.getDataType ) - val preparator: CarbonDictionarySortInfoPreparator = new CarbonDictionarySortInfoPreparator - val dictService = CarbonCommonFactory.getDictionaryService - val dictionarySortInfo: CarbonDictionarySortInfo = - preparator.getDictionarySortInfo(distinctValues, dictionary, - columnSchema.getDataType) - carbonDictionarySortIndexWriter = - dictService.getDictionarySortIndexWriter(tableIdentifier, columnIdentifier, - storePath) - carbonDictionarySortIndexWriter.writeSortIndex(dictionarySortInfo.getSortIndex) - carbonDictionarySortIndexWriter - .writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted) + val sortIndexWriteTask = new SortIndexWriterTask(tableIdentifier, + columnIdentifier, + columnSchema.getDataType, + storePath, + dictionary, + distinctValues) + sortIndexWriteTask.execute() } if (null != carbonDictionarySortIndexWriter) { @@ -861,9 +857,8 @@ object GlobalDictionaryUtil { columnSchema.getColumnName }") - if (null != writer) { - writer.commit() - } + // After sortIndex writing, update dictionaryMeta + dictWriteTask.updateMetaData() LOGGER.info(s"Dictionary meta file writing is successful for new column ${ columnSchema.getColumnName http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java index 6d969c8..2a9c701 100644 --- a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java +++ b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java @@ -34,26 +34,56 @@ class ColumnarVectorWrapper implements CarbonColumnVector { columnVector.putShort(rowId, value); } + @Override public void putShorts(int rowId, int count, short value) { + columnVector.putShorts(rowId, count, value); + } + @Override public void putInt(int rowId, int value) { columnVector.putInt(rowId, value); } + @Override public void putInts(int rowId, int count, int value) { + columnVector.putInts(rowId, count, value); + } + @Override public void putLong(int rowId, long value) { columnVector.putLong(rowId, value); } + @Override public void putLongs(int rowId, int count, long value) { + columnVector.putLongs(rowId, count, value); + } + @Override public void putDecimal(int rowId, Decimal value, int precision) { columnVector.putDecimal(rowId, value, precision); } + @Override public void putDecimals(int rowId, int count, Decimal value, int precision) { + for (int i = 0; i < count; i++) { + rowId += i; + putDecimal(rowId, value, precision); + } + } + @Override public void putDouble(int rowId, double value) { columnVector.putDouble(rowId, value); } + @Override public void putDoubles(int rowId, int count, double value) { + columnVector.putDoubles(rowId, count, value); + } + @Override public void putBytes(int rowId, byte[] value) { columnVector.putByteArray(rowId, value); } + @Override public void putBytes(int rowId, int count, byte[] value) { + for (int i = 0; i < count; i++) { + rowId += i; + putBytes(rowId, value); + } + } + @Override public void putBytes(int rowId, int offset, int length, byte[] value) { columnVector.putByteArray(rowId, value, offset, length); } @@ -62,6 +92,10 @@ class ColumnarVectorWrapper implements CarbonColumnVector { columnVector.putNull(rowId); } + @Override public void putNulls(int rowId, int count) { + columnVector.putNulls(rowId, count); + } + @Override public boolean isNull(int rowId) { return columnVector.isNullAt(rowId); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala index f92b1e3..6c61f93 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala @@ -279,7 +279,7 @@ class CarbonDecoderRDD( } val getDictionaryColumnIds = { - val dictIds: Array[(String, ColumnIdentifier, DataType)] = output.map { a => + val dictIds: Array[(String, ColumnIdentifier, CarbonDimension)] = output.map { a => val attr = aliasMap.getOrElse(a, a) val relation = relations.find(p => p.contains(attr)) if (relation.isDefined && canBeDecoded(attr)) { @@ -291,7 +291,7 @@ class CarbonDecoderRDD( !carbonDimension.hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonDimension.isComplex()) { (carbonTable.getFactTableName, carbonDimension.getColumnIdentifier, - carbonDimension.getDataType) + carbonDimension) } else { (null, null, null) } @@ -355,7 +355,7 @@ class CarbonDecoderRDD( try { cache.get(new DictionaryColumnUniqueIdentifier( atiMap(f._1).getCarbonTableIdentifier, - f._2, f._3)) + f._2, f._3.getDataType)) } catch { case _: Throwable => null } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 1b8b34d..b9160b3 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -37,6 +37,7 @@ import org.codehaus.jackson.map.ObjectMapper import org.apache.carbondata.api.CarbonStore import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.cache.dictionary.ManageDictionary import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.dictionary.server.DictionaryServer @@ -520,7 +521,7 @@ private[sql] case class AlterTableDropColumns( sparkSession.catalog.refreshTable(tableIdentifier.quotedString) // TODO: 1. add check for deletion of index tables // delete dictionary files for dictionary column and clear dictionary cache from memory - CarbonUtil.deleteDictionaryFileAndCache(dictionaryColumns.toList.asJava, carbonTable) + ManageDictionary.deleteDictionaryFileAndCache(dictionaryColumns.toList.asJava, carbonTable) LOGGER.info(s"Alter table for drop columns is successful for table $dbName.$tableName") LOGGER.audit(s"Alter table for drop columns is successful for table $dbName.$tableName") } catch { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b3b16c5/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala index 145a2b5..8c4f9f1 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala @@ -813,7 +813,7 @@ case class CarbonRelation( val sett = new LinkedHashSet( tableMeta.carbonTable.getDimensionByTableName(tableMeta.carbonTableIdentifier.getTableName) .asScala.asJava) - sett.asScala.toSeq.filter(!_.getColumnSchema.isInvisible).map(dim => { + sett.asScala.toSeq.map(dim => { val dimval = metaData.carbonTable .getDimensionByName(metaData.carbonTable.getFactTableName, dim.getColName) val output: DataType = dimval.getDataType @@ -839,7 +839,7 @@ case class CarbonRelation( new LinkedHashSet( tableMeta.carbonTable. getMeasureByTableName(tableMeta.carbonTable.getFactTableName). - asScala.asJava).asScala.toSeq.filter(!_.getColumnSchema.isInvisible) + asScala.asJava).asScala.toSeq .map(x => AttributeReference(x.getColName, CarbonMetastoreTypes.toDataType( metaData.carbonTable.getMeasureByName(factTable, x.getColName).getDataType.toString .toLowerCase match {