Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 4A9CD200B35 for ; Tue, 5 Jul 2016 14:07:59 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 4915E160A60; Tue, 5 Jul 2016 12:07:59 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 93706160A4F for ; Tue, 5 Jul 2016 14:07:57 +0200 (CEST) Received: (qmail 47501 invoked by uid 500); 5 Jul 2016 12:07:56 -0000 Mailing-List: contact commits-help@carbondata.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@carbondata.incubator.apache.org Delivered-To: mailing list commits@carbondata.incubator.apache.org Received: (qmail 47492 invoked by uid 99); 5 Jul 2016 12:07:56 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 05 Jul 2016 12:07:56 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 028881A5DD7 for ; Tue, 5 Jul 2016 12:07:56 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -4.646 X-Spam-Level: X-Spam-Status: No, score=-4.646 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-1.426] autolearn=disabled Received: from mx2-lw-us.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id Ok23nRRtzBt9 for ; Tue, 5 Jul 2016 12:07:48 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx2-lw-us.apache.org (ASF Mail Server at mx2-lw-us.apache.org) with SMTP id 7CB475F23D for ; Tue, 5 Jul 2016 12:07:47 +0000 (UTC) Received: (qmail 47452 invoked by uid 99); 5 Jul 2016 12:07:46 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 05 Jul 2016 12:07:46 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7CD8AE07F6; Tue, 5 Jul 2016 12:07:46 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: vimaldas@apache.org To: commits@carbondata.incubator.apache.org Date: Tue, 05 Jul 2016 12:07:46 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [1/2] incubator-carbondata git commit: [CARBONDATA-29] Make inverted index can be configurable archived-at: Tue, 05 Jul 2016 12:07:59 -0000 Repository: incubator-carbondata Updated Branches: refs/heads/master 60094c183 -> 5e7fb7c1e [CARBONDATA-29] Make inverted index can be configurable Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/4c67a7f4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/4c67a7f4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/4c67a7f4 Branch: refs/heads/master Commit: 4c67a7f402a6cf18283a7503833d12b8a115a2b2 Parents: 60094c1 Author: Zhangshunyu Authored: Tue Jul 5 17:28:05 2016 +0530 Committer: vimaldas Committed: Tue Jul 5 17:28:05 2016 +0530 ---------------------------------------------------------------------- .../schema/table/column/CarbonColumn.java | 6 + .../schema/table/column/ColumnSchema.java | 19 +++ .../BlockIndexerStorageForNoInvertedIndex.java | 159 +++++++++++++++++++ .../executer/ExcludeFilterExecuterImpl.java | 20 +-- .../executer/IncludeFilterExecuterImpl.java | 20 +-- .../org/apache/spark/sql/CarbonSqlParser.scala | 46 +++++- .../execution/command/carbonTableSchema.scala | 14 ++ .../TestNoInvertedIndexLoadAndQuery.scala | 66 ++++++++ .../graphgenerator/GraphGenerator.java | 23 ++- .../configuration/GraphConfigurationInfo.java | 16 ++ .../processing/mdkeygen/MDKeyGenStep.java | 9 ++ .../processing/mdkeygen/MDKeyGenStepMeta.java | 21 +++ .../store/CarbonFactDataHandlerColumnar.java | 37 ++++- .../store/CarbonFactDataHandlerModel.java | 11 ++ 14 files changed, 419 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/CarbonColumn.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/CarbonColumn.java b/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/CarbonColumn.java index 04008ab..90d1869 100644 --- a/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/CarbonColumn.java +++ b/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/CarbonColumn.java @@ -148,6 +148,12 @@ public class CarbonColumn implements Serializable { return columnSchema.isDimensionColumn(); } + /** + * @return if column use inverted index return true, else false. + */ + public Boolean isUseInvertedIndnex() { + return columnSchema.isUseInvertedIndex(); + } public ColumnSchema getColumnSchema() { return this.columnSchema; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/ColumnSchema.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/ColumnSchema.java b/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/ColumnSchema.java index 5c54ad3..cc0bce9 100644 --- a/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/ColumnSchema.java +++ b/core/src/main/java/org/carbondata/core/carbon/metadata/schema/table/column/ColumnSchema.java @@ -75,6 +75,11 @@ public class ColumnSchema implements Serializable { private boolean isDimensionColumn; /** + * Whether the column should use inverted index + */ + private boolean useInvertedIndex; + + /** * The group ID for column used for row format columns, * where in columns in each group are chunked together. */ @@ -172,6 +177,20 @@ public class ColumnSchema implements Serializable { } /** + * the isUseInvertedIndex + */ + public boolean isUseInvertedIndex() { + return useInvertedIndex; + } + + /** + * @param useInvertedIndex the useInvertedIndex to set + */ + public void setUseInvertedIndex(boolean useInvertedIndex) { + this.useInvertedIndex = useInvertedIndex; + } + + /** * @return the columnGroup */ public int getColumnGroupId() { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/core/src/main/java/org/carbondata/core/datastorage/store/columnar/BlockIndexerStorageForNoInvertedIndex.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/carbondata/core/datastorage/store/columnar/BlockIndexerStorageForNoInvertedIndex.java b/core/src/main/java/org/carbondata/core/datastorage/store/columnar/BlockIndexerStorageForNoInvertedIndex.java new file mode 100644 index 0000000..045508f --- /dev/null +++ b/core/src/main/java/org/carbondata/core/datastorage/store/columnar/BlockIndexerStorageForNoInvertedIndex.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.carbondata.core.datastorage.store.columnar; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +import org.carbondata.core.constants.CarbonCommonConstants; +import org.carbondata.core.util.ByteUtil; + +public class BlockIndexerStorageForNoInvertedIndex implements IndexStorage { + private byte[][] keyBlock; + private byte[][] sortedBlock; + private int totalSize; + private int[] dataIndexMap; + + public BlockIndexerStorageForNoInvertedIndex(byte[][] keyBlockInput, boolean compressData, + boolean isNoDictionary) { + // without invertedindex but can be RLE + if (compressData) { + // with RLE + byte[] prvKey = keyBlockInput[0]; + List list = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); + list.add(keyBlockInput[0]); + int counter = 1; + int start = 0; + List map = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); + int length = keyBlockInput.length; + for(int i = 1; i < length; i++) { + if (ByteUtil.UnsafeComparer.INSTANCE.compareTo(prvKey, keyBlockInput[i]) != 0) { + prvKey = keyBlockInput[i]; + list.add(keyBlockInput[i]); + map.add(start); + map.add(counter); + start += counter; + counter = 1; + continue; + } + counter++; + } + map.add(start); + map.add(counter); + this.keyBlock = convertToKeyArray(list); + if (keyBlockInput.length == this.keyBlock.length) { + dataIndexMap = new int[0]; + } else { + dataIndexMap = convertToArray(map); + } + } else { + this.keyBlock = keyBlockInput; + dataIndexMap = new int[0]; + } + + this.sortedBlock = new byte[keyBlock.length][]; + System.arraycopy(keyBlock, 0, sortedBlock, 0, keyBlock.length); + if (isNoDictionary) { + Arrays.sort(sortedBlock, new Comparator() { + @Override + public int compare(byte[] col1, byte[] col2) { + return ByteUtil.UnsafeComparer.INSTANCE + .compareTo(col1, 2, col1.length - 2, col2, 2, col2.length - 2); + } + }); + } else { + Arrays.sort(sortedBlock, new Comparator() { + @Override + public int compare(byte[] col1, byte[] col2) { + return ByteUtil.UnsafeComparer.INSTANCE.compareTo(col1, col2); + } + }); + } + + } + + private int[] convertToArray(List list) { + int[] shortArray = new int[list.size()]; + for(int i = 0; i < shortArray.length; i++) { + shortArray[i] = list.get(i); + } + return shortArray; + } + + private byte[][] convertToKeyArray(List list) { + byte[][] shortArray = new byte[list.size()][]; + for (int i = 0; i < shortArray.length; i++) { + shortArray[i] = list.get(i); + totalSize += shortArray[i].length; + } + return shortArray; + } + + @Override + public int[] getDataIndexMap() { + return dataIndexMap; + } + + @Override + public int getTotalSize() { + return totalSize; + } + + @Override + public boolean isAlreadySorted() { + return true; + } + + /** + * no use + * @return + */ + @Override + public int[] getDataAfterComp() { + return new int[0]; + } + + /** + * no use + * @return + */ + @Override + public int[] getIndexMap() { + return new int[0]; + } + + /** + * @return the keyBlock + */ + public byte[][] getKeyBlock() { + return keyBlock; + } + + @Override public byte[] getMin() { + return sortedBlock[0]; + } + + @Override public byte[] getMax() { + return sortedBlock[sortedBlock.length - 1]; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/core/src/main/java/org/carbondata/scan/filter/executer/ExcludeFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/carbondata/scan/filter/executer/ExcludeFilterExecuterImpl.java b/core/src/main/java/org/carbondata/scan/filter/executer/ExcludeFilterExecuterImpl.java index 2bc5450..43ea2e5 100644 --- a/core/src/main/java/org/carbondata/scan/filter/executer/ExcludeFilterExecuterImpl.java +++ b/core/src/main/java/org/carbondata/scan/filter/executer/ExcludeFilterExecuterImpl.java @@ -166,34 +166,16 @@ public class ExcludeFilterExecuterImpl implements FilterExecuter { private BitSet setFilterdIndexToBitSet(FixedLengthDimensionDataChunk dimColumnDataChunk, int numerOfRows) { BitSet bitSet = new BitSet(numerOfRows); - int startKey = 0; - int last = 0; bitSet.flip(0, numerOfRows); - int startIndex = 0; byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); for (int k = 0; k < filterValues.length; k++) { - startKey = CarbonUtil - .getFirstIndexUsingBinarySearch(dimColumnDataChunk, startIndex, numerOfRows - 1, - filterValues[k], false); - if (startKey < 0) { - continue; - } - bitSet.flip(startKey); - last = startKey; - for (int j = startKey + 1; j < numerOfRows; j++) { + for (int j = 0; j < numerOfRows; j++) { if (ByteUtil.UnsafeComparer.INSTANCE .compareTo(dimColumnDataChunk.getCompleteDataChunk(), j * filterValues[k].length, filterValues[k].length, filterValues[k], 0, filterValues[k].length) == 0) { bitSet.flip(j); - last++; - } else { - break; } } - startIndex = last; - if (startIndex >= numerOfRows) { - break; - } } return bitSet; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/core/src/main/java/org/carbondata/scan/filter/executer/IncludeFilterExecuterImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/carbondata/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/carbondata/scan/filter/executer/IncludeFilterExecuterImpl.java index ff6ecd2..14a4c3b 100644 --- a/core/src/main/java/org/carbondata/scan/filter/executer/IncludeFilterExecuterImpl.java +++ b/core/src/main/java/org/carbondata/scan/filter/executer/IncludeFilterExecuterImpl.java @@ -159,33 +159,15 @@ public class IncludeFilterExecuterImpl implements FilterExecuter { if (dimensionColumnDataChunk instanceof FixedLengthDimensionDataChunk) { FixedLengthDimensionDataChunk fixedDimensionChunk = (FixedLengthDimensionDataChunk) dimensionColumnDataChunk; - int start = 0; - int last = 0; - int startIndex = 0; byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); for (int k = 0; k < filterValues.length; k++) { - start = CarbonUtil.getFirstIndexUsingBinarySearch( - (FixedLengthDimensionDataChunk) dimensionColumnDataChunk, startIndex, numerOfRows - 1, - filterValues[k], false); - if (start < 0) { - continue; - } - bitSet.set(start); - last = start; - for (int j = start + 1; j < numerOfRows; j++) { + for (int j = 0; j < numerOfRows; j++) { if (ByteUtil.UnsafeComparer.INSTANCE .compareTo(fixedDimensionChunk.getCompleteDataChunk(), j * filterValues[k].length, filterValues[k].length, filterValues[k], 0, filterValues[k].length) == 0) { bitSet.set(j); - last++; - } else { - break; } } - startIndex = last; - if (startIndex >= numerOfRows) { - break; - } } } return bitSet; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala index 8536544..6164f54 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala @@ -479,13 +479,17 @@ class CarbonSqlParser() val groupCols: Seq[String] = updateColumnGroupsInField(tableProperties, noDictionaryDims, msrs, dims) + // get no inverted index columns from table properties. + val noInvertedIdxCols = extractNoInvertedIndexColumns(fields, tableProperties) + val partitioner: Option[Partitioner] = getPartitionerObject(partitionCols, tableProperties) tableModel(ifNotExistPresent, dbName.getOrElse("default"), dbName, tableName, reorderDimensions(dims.map(f => normalizeType(f)).map(f => addParent(f))), msrs.map(f => normalizeType(f)), "", null, "", - None, Seq(), null, Option(noDictionaryDims), null, partitioner, groupCols, Some(colProps)) + None, Seq(), null, Option(noDictionaryDims), Option(noInvertedIdxCols), null, partitioner, + groupCols, Some(colProps)) } /** @@ -647,6 +651,45 @@ class CarbonSqlParser() } } /** + * This will extract the no inverted columns fields. + * By default all dimensions use inverted index. + * + * @param fields + * @param tableProperties + * @return + */ + protected def extractNoInvertedIndexColumns(fields: Seq[Field], + tableProperties: Map[String, String]): + Seq[String] = { + // check whether the column name is in fields + var noInvertedIdxColsProps: Array[String] = Array[String]() + var noInvertedIdxCols: Seq[String] = Seq[String]() + + if (tableProperties.get("NO_INVERTED_INDEX").isDefined) { + noInvertedIdxColsProps = + tableProperties.get("NO_INVERTED_INDEX").get.split(',').map(_.trim) + noInvertedIdxColsProps + .map { noInvertedIdxColProp => + if (!fields.exists(x => x.column.equalsIgnoreCase(noInvertedIdxColProp))) { + val errormsg = "NO_INVERTED_INDEX column: " + noInvertedIdxColProp + + " does not exist in table. Please check create table statement." + throw new MalformedCarbonCommandException(errormsg) + } + } + } + // check duplicate columns and only 1 col left + val distinctCols = noInvertedIdxColsProps.toSet + // extract the no inverted index columns + fields.foreach( field => { + if (distinctCols.exists(x => x.equalsIgnoreCase(field.column))) { + noInvertedIdxCols :+= field.column + } + } + ) + noInvertedIdxCols + } + + /** * This will extract the Dimensions and NoDictionary Dimensions fields. * By default all string cols are dimensions. * @@ -881,7 +924,6 @@ class CarbonSqlParser() case other => other } - protected lazy val loadDataNew: Parser[LogicalPlan] = LOAD ~> DATA ~> opt(LOCAL) ~> INPATH ~> stringLit ~ opt(OVERWRITE) ~ (INTO ~> TABLE ~> (ident <~ ".").? ~ ident) ~ http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 4a6551b..826ac7a 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -75,6 +75,7 @@ case class tableModel( dimRelations: Seq[DimensionRelation], simpleDimRelations: Seq[DimensionRelation], highcardinalitydims: Option[Seq[String]], + noInvertedIdxCols: Option[Seq[String]], aggregation: Seq[Aggregation], partitioner: Option[Partitioner], columnGroups: Seq[String], @@ -298,6 +299,19 @@ class TableNewProcessor(cm: tableModel, sqlContext: SQLContext) { } + // Setting the boolen value of useInvertedIndex in column shcehma + val noInvertedIndexCols = cm.noInvertedIdxCols.getOrElse(Seq()) + for (column <- allColumns) { + // When the column is measure or the specified no inverted index column in DDL, + // set useInvertedIndex to false, otherwise true. + if (noInvertedIndexCols.contains(column.getColumnName) || + cm.msrCols.exists(_.column.equalsIgnoreCase(column.getColumnName))) { + column.setUseInvertedIndex(false) + } else { + column.setUseInvertedIndex(true) + } + } + // Adding dummy measure if no measure is provided if (measures.size < 1) { val encoders = new java.util.ArrayList[Encoding]() http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala ---------------------------------------------------------------------- diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala new file mode 100644 index 0000000..44f505d --- /dev/null +++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestNoInvertedIndexLoadAndQuery.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.carbondata.spark.testsuite.dataload + +import java.io.File + +import org.apache.spark.sql.common.util.CarbonHiveContext._ +import org.apache.spark.sql.common.util.QueryTest +import org.apache.spark.sql.Row +import org.scalatest.BeforeAndAfterAll + +/** + * Test Class for no inverted index load and query + * + */ + +class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll{ + + def currentPath: String = new File(this.getClass.getResource("/").getPath + "/../../") + .getCanonicalPath + val testData = new File(currentPath + "/../../examples/src/main/resources/dimSample.csv") + .getCanonicalPath + override def beforeAll { + sql("DROP TABLE IF EXISTS index") + } + + test("no inverted index load and query") { + + sql(""" + CREATE TABLE IF NOT EXISTS index + (id Int, name String, city String) + STORED BY 'org.apache.carbondata.format' + TBLPROPERTIES('NO_INVERTED_INDEX'='name,city') + """) + sql(s""" + LOAD DATA LOCAL INPATH '$testData' into table index + """) + checkAnswer( + sql(""" + SELECT * FROM index WHERE city = "Bangalore" + """), + Seq(Row("Emily", "Bangalore", 19.0))) + + } + + override def afterAll { + sql("drop table index") + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java b/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java index 435180b..f61e5dd 100644 --- a/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java +++ b/processing/src/main/java/org/carbondata/processing/graphgenerator/GraphGenerator.java @@ -341,7 +341,6 @@ public class GraphGenerator { inputStep = getTableInputStep(configurationInfo); selectValueToChangeTheDataType = getSelectValueToChangeTheDataType(configurationInfo, 1); } - carbonSurrogateKeyStep = getCarbonCSVBasedSurrogateKeyStep(configurationInfo); StepMeta sortStep = getSortStep(configurationInfo); StepMeta carbonMDKeyStep = getMDKeyStep(configurationInfo); @@ -597,6 +596,8 @@ public class GraphGenerator { private StepMeta getMDKeyStep(GraphConfigurationInfo graphConfiguration) { MDKeyGenStepMeta carbonMdKey = new MDKeyGenStepMeta(); + carbonMdKey.setIsUseInvertedIndex(RemoveDictionaryUtil + .convertBooleanArrToString(graphConfiguration.getIsUseInvertedIndex())); carbonMdKey.setPartitionID(partitionID); carbonMdKey.setSegmentId(segmentId); carbonMdKey.setNumberOfCores(graphConfiguration.getNumberOfCores()); @@ -784,6 +785,7 @@ public class GraphGenerator { graphConfiguration.setCurrentRestructNumber(currentRestructNumber); List dimensions = carbonDataLoadSchema.getCarbonTable() .getDimensionByTableName(carbonDataLoadSchema.getCarbonTable().getFactTableName()); + prepareIsUseInvertedIndex(dimensions, graphConfiguration); graphConfiguration .setDimensions(CarbonSchemaParser.getCubeDimensions(dimensions, carbonDataLoadSchema)); graphConfiguration @@ -948,4 +950,23 @@ public class GraphGenerator { graphConfig.setIsNoDictionaryDimMapping( noDictionaryMapping.toArray(new Boolean[noDictionaryMapping.size()])); } + /** + * Preparing the boolean [] to map whether the dimension use inverted index or not. + * + * @param dims + * @param graphConfig + */ + private void prepareIsUseInvertedIndex(List dims, + GraphConfigurationInfo graphConfig) { + List isUseInvertedIndexList = new ArrayList(); + for (CarbonDimension dimension : dims) { + if(dimension.isUseInvertedIndnex()) { + isUseInvertedIndexList.add(true); + } else { + isUseInvertedIndexList.add(false); + } + } + graphConfig.setIsUseInvertedIndex(isUseInvertedIndexList + .toArray(new Boolean[isUseInvertedIndexList.size()])); + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/graphgenerator/configuration/GraphConfigurationInfo.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/graphgenerator/configuration/GraphConfigurationInfo.java b/processing/src/main/java/org/carbondata/processing/graphgenerator/configuration/GraphConfigurationInfo.java index 8ab37a6..1b2afa5 100644 --- a/processing/src/main/java/org/carbondata/processing/graphgenerator/configuration/GraphConfigurationInfo.java +++ b/processing/src/main/java/org/carbondata/processing/graphgenerator/configuration/GraphConfigurationInfo.java @@ -190,6 +190,8 @@ public class GraphConfigurationInfo { private Boolean[] isNoDictionaryDimMapping; + private Boolean[] isUseInvertedIndex; + private String columnPropertiesString; /** @@ -207,6 +209,20 @@ public class GraphConfigurationInfo { private String columnGroupsString; private String columnsDataTypeString; /** + * @return isUseInvertedIndex + */ + public Boolean[] getIsUseInvertedIndex() { + return isUseInvertedIndex; + } + + /** + * @param isUseInvertedIndex the bool array whether use inverted index to set + */ + public void setIsUseInvertedIndex(Boolean[] isUseInvertedIndex) { + this.isUseInvertedIndex = isUseInvertedIndex; + } + + /** * @return the connectionName */ public String getConnectionName() { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStep.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStep.java b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStep.java index de35082..1b89823 100644 --- a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStep.java +++ b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStep.java @@ -139,6 +139,12 @@ public class MDKeyGenStep extends BaseStep { private boolean[] isNoDictionaryDimension; /** + * to check whether dimension use inverted index + * or not + */ + private boolean[] isUseInvertedIndex; + + /** * CarbonMDKeyGenStep * * @param stepMeta @@ -250,6 +256,8 @@ public class MDKeyGenStep extends BaseStep { String.valueOf(meta.getTaskNo()), meta.getPartitionID(), meta.getSegmentId()+""); isNoDictionaryDimension = RemoveDictionaryUtil.convertStringToBooleanArr(meta.getNoDictionaryDimsMapping()); + isUseInvertedIndex = + RemoveDictionaryUtil.convertStringToBooleanArr(meta.getIsUseInvertedIndex()); fileManager = new FileManager(); fileManager.setName(CarbonCommonConstants.LOAD_FOLDER + meta.getSegmentId() + CarbonCommonConstants.FILE_INPROGRESS_STATUS); @@ -352,6 +360,7 @@ public class MDKeyGenStep extends BaseStep { carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen); carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes); carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath); + carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex); if (meta.getNoDictionaryCount() > 0 || meta.getComplexDimsCount() > 0) { carbonFactDataHandlerModel.setMdKeyIndex(measureCount + 1); } else { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java index 8bed860..9b65e63 100644 --- a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java +++ b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java @@ -140,6 +140,10 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface * To determine the column whether is dictionary or not. */ private String noDictionaryDimsMapping; + /** + * To determine the column whether use inverted index or not. + */ + private String isUseInvertedIndex; /** * Constructor @@ -186,6 +190,7 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface retval.append(" ").append(XMLHandler.addTagValue("factTimeStamp", factTimeStamp)); retval.append(" ").append(XMLHandler.addTagValue("factTimeStamp", factTimeStamp)); retval.append(" ").append(XMLHandler.addTagValue("partitionID", partitionID)); + retval.append(" ").append(XMLHandler.addTagValue("isUseInvertedIndex", isUseInvertedIndex)); retval.append(" ").append(XMLHandler.addTagValue("segmentId", segmentId)); retval.append(" ") .append(XMLHandler.addTagValue("noDictionaryDimsMapping", noDictionaryDimsMapping)); @@ -213,6 +218,7 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface taskNo = XMLHandler.getTagValue(stepnode, "taskNo"); factTimeStamp = XMLHandler.getTagValue(stepnode, "factTimeStamp"); partitionID = XMLHandler.getTagValue(stepnode, "partitionID"); + isUseInvertedIndex = XMLHandler.getTagValue(stepnode, "isUseInvertedIndex"); segmentId = XMLHandler.getTagValue(stepnode, "segmentId"); noDictionaryDimsMapping = XMLHandler.getTagValue(stepnode, "noDictionaryDimsMapping"); } catch (Exception e) { @@ -240,6 +246,7 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface rep.saveStepAttribute(idTransformation, idStep, "taskNo", taskNo); rep.saveStepAttribute(idTransformation, idStep, "factTimeStamp", factTimeStamp); rep.saveStepAttribute(idTransformation, idStep, "partitionID", partitionID); + rep.saveStepAttribute(idTransformation, idStep, "isUseInvertedIndex", isUseInvertedIndex); rep.saveStepAttribute(idTransformation, idStep, "segmentId", segmentId); rep.saveStepAttribute(idTransformation, idStep, "noDictionaryDimsMapping", noDictionaryDimsMapping); @@ -270,6 +277,7 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface taskNo = rep.getStepAttributeString(idStep, "taskNo"); factTimeStamp = rep.getStepAttributeString(idStep, "factTimeStamp"); partitionID = rep.getStepAttributeString(idStep, "partitionID"); + isUseInvertedIndex = rep.getStepAttributeString(idStep, "isUseInvertedIndex"); segmentId = rep.getStepAttributeString(idStep, "segmentId"); noDictionaryDimsMapping = rep.getStepAttributeString(idStep, "noDictionaryDimsMapping"); } catch (Exception e) { @@ -576,4 +584,17 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface public void setNoDictionaryDimsMapping(String noDictionaryDimsMapping) { this.noDictionaryDimsMapping = noDictionaryDimsMapping; } + /** + * @return isUseInvertedIndex + */ + public String getIsUseInvertedIndex() { + return isUseInvertedIndex; + } + + /** + * @param isUseInvertedIndex the bool array whether use inverted index to set + */ + public void setIsUseInvertedIndex(String isUseInvertedIndex) { + this.isUseInvertedIndex = isUseInvertedIndex; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerColumnar.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerColumnar.java index 5277906..7b94dda 100644 --- a/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerColumnar.java +++ b/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerColumnar.java @@ -47,6 +47,7 @@ import org.carbondata.core.carbon.metadata.schema.table.CarbonTable; import org.carbondata.core.carbon.metadata.schema.table.column.ColumnSchema; import org.carbondata.core.constants.CarbonCommonConstants; import org.carbondata.core.datastorage.store.columnar.BlockIndexerStorageForInt; +import org.carbondata.core.datastorage.store.columnar.BlockIndexerStorageForNoInvertedIndex; import org.carbondata.core.datastorage.store.columnar.ColumnGroupModel; import org.carbondata.core.datastorage.store.columnar.IndexStorage; import org.carbondata.core.datastorage.store.compression.ValueCompressionModel; @@ -153,6 +154,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { private boolean[] aggKeyBlock; private boolean[] isNoDictionary; private boolean isAggKeyBlock; + private boolean enableInvertedIndex; private long processedDataCount; /** * thread pool size to be used for block sort @@ -190,6 +192,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { private int[] primitiveDimLens; private char[] type; private int[] completeDimLens; + private boolean[] isUseInvertedIndex; /** * data file attributes which will used for file construction */ @@ -276,6 +279,16 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { this.aggKeyBlock = new boolean[columnStoreCount]; this.isNoDictionary = new boolean[columnStoreCount]; + this.isUseInvertedIndex = new boolean[columnStoreCount]; + if (null != carbonFactDataHandlerModel.getIsUseInvertedIndex()) { + for (int i = 0; i < isUseInvertedIndex.length; i++) { + if (i < carbonFactDataHandlerModel.getIsUseInvertedIndex().length) { + isUseInvertedIndex[i] = carbonFactDataHandlerModel.getIsUseInvertedIndex()[i]; + } else { + isUseInvertedIndex[i] = true; + } + } + } int noDictStartIndex = this.colGrpModel.getNoOfColumnStore(); // setting true value for dims of high card for (int i = 0; i < noDictionaryCount; i++) { @@ -670,7 +683,8 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { dictionaryColumnCount++; if (colGrpModel.isColumnar(dictionaryColumnCount)) { submit.add(executorService - .submit(new BlockSortThread(i, dataHolders[dictionaryColumnCount].getData(), true))); + .submit(new BlockSortThread(i, dataHolders[dictionaryColumnCount].getData(), + true, isUseInvertedIndex[i]))); } else { submit.add( executorService.submit(new ColGroupBlockStorage(dataHolders[dictionaryColumnCount]))); @@ -678,12 +692,12 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { } else { submit.add(executorService.submit( new BlockSortThread(i, noDictionaryColumnsData[++noDictionaryColumnCount], false, true, - true))); + true, isUseInvertedIndex[i]))); } } for (int k = 0; k < complexColCount; k++) { submit.add(executorService.submit(new BlockSortThread(i++, - colsAndValues.get(k).toArray(new byte[colsAndValues.get(k).size()][]), false))); + colsAndValues.get(k).toArray(new byte[colsAndValues.get(k).size()][]), false, true))); } executorService.shutdown(); try { @@ -1238,28 +1252,37 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { private byte[][] data; private boolean isSortRequired; private boolean isCompressionReq; + private boolean isUseInvertedIndex; private boolean isNoDictionary; - private BlockSortThread(int index, byte[][] data, boolean isSortRequired) { + private BlockSortThread(int index, byte[][] data, boolean isSortRequired, + boolean isUseInvertedIndex) { this.index = index; this.data = data; isCompressionReq = aggKeyBlock[this.index]; this.isSortRequired = isSortRequired; + this.isUseInvertedIndex = isUseInvertedIndex; } public BlockSortThread(int index, byte[][] data, boolean b, boolean isNoDictionary, - boolean isSortRequired) { + boolean isSortRequired, boolean isUseInvertedIndex) { this.index = index; this.data = data; isCompressionReq = b; this.isNoDictionary = isNoDictionary; this.isSortRequired = isSortRequired; + this.isUseInvertedIndex = isUseInvertedIndex; } @Override public IndexStorage call() throws Exception { - return new BlockIndexerStorageForInt(this.data, isCompressionReq, isNoDictionary, - isSortRequired); + if (isUseInvertedIndex) { + return new BlockIndexerStorageForInt(this.data, isCompressionReq, isNoDictionary, + isSortRequired); + } else { + return new BlockIndexerStorageForNoInvertedIndex(this.data, isCompressionReq, + isNoDictionary); + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4c67a7f4/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerModel.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerModel.java b/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerModel.java index 324d5fe..c856c06 100644 --- a/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerModel.java +++ b/processing/src/main/java/org/carbondata/processing/store/CarbonFactDataHandlerModel.java @@ -80,6 +80,10 @@ public class CarbonFactDataHandlerModel { */ private boolean isUpdateMemberRequest; /** + * flag to check whether use inverted index + */ + private boolean[] isUseInvertedIndex; + /** * dimension cardinality */ private int[] dimLens; @@ -348,6 +352,13 @@ public class CarbonFactDataHandlerModel { isCompactionFlow = compactionFlow; } + public boolean[] getIsUseInvertedIndex() { + return isUseInvertedIndex; + } + + public void setIsUseInvertedIndex(boolean[] isUseInvertedIndex) { + this.isUseInvertedIndex = isUseInvertedIndex; + } /** * * @return segmentProperties