Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id C7ADB200C3C for ; Mon, 20 Mar 2017 02:26:39 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id C64CE160B8E; Mon, 20 Mar 2017 01:26:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id AA560160B7D for ; Mon, 20 Mar 2017 02:26:37 +0100 (CET) Received: (qmail 45595 invoked by uid 500); 20 Mar 2017 01:26:36 -0000 Mailing-List: contact commits-help@carbondata.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@carbondata.incubator.apache.org Delivered-To: mailing list commits@carbondata.incubator.apache.org Received: (qmail 45586 invoked by uid 99); 20 Mar 2017 01:26:36 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd3-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 Mar 2017 01:26:36 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd3-us-west.apache.org (ASF Mail Server at spamd3-us-west.apache.org) with ESMTP id 59BD2188004 for ; Mon, 20 Mar 2017 01:26:36 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd3-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -3.569 X-Spam-Level: X-Spam-Status: No, score=-3.569 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-0.001, SPF_NEUTRAL=0.652] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd3-us-west.apache.org [10.40.0.10]) (amavisd-new, port 10024) with ESMTP id vgG2yoOXnSep for ; Mon, 20 Mar 2017 01:26:25 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id 73F035FC29 for ; Mon, 20 Mar 2017 01:26:23 +0000 (UTC) Received: (qmail 45523 invoked by uid 99); 20 Mar 2017 01:26:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 Mar 2017 01:26:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 833B2DFF71; Mon, 20 Mar 2017 01:26:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jackylk@apache.org To: commits@carbondata.incubator.apache.org Date: Mon, 20 Mar 2017 01:26:22 -0000 Message-Id: <0b38ad6605954f27a9913a2447200528@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/3] incubator-carbondata git commit: Added batch sort to improve the loading performance archived-at: Mon, 20 Mar 2017 01:26:40 -0000 Repository: incubator-carbondata Updated Branches: refs/heads/master 0b44d0e3a -> 8d0a672b9 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/SortProcessorStepImpl.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/SortProcessorStepImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/SortProcessorStepImpl.java index aca47b6..17cc01e 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/SortProcessorStepImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/SortProcessorStepImpl.java @@ -30,6 +30,7 @@ import org.apache.carbondata.processing.newflow.row.CarbonRowBatch; import org.apache.carbondata.processing.newflow.sort.Sorter; import org.apache.carbondata.processing.newflow.sort.impl.ParallelReadMergeSorterImpl; import org.apache.carbondata.processing.newflow.sort.impl.ParallelReadMergeSorterWithBucketingImpl; +import org.apache.carbondata.processing.newflow.sort.impl.UnsafeBatchParallelReadMergeSorterImpl; import org.apache.carbondata.processing.newflow.sort.impl.UnsafeParallelReadMergeSorterImpl; import org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters; @@ -58,7 +59,12 @@ public class SortProcessorStepImpl extends AbstractDataLoadProcessorStep { boolean offheapsort = Boolean.parseBoolean(CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT, CarbonCommonConstants.ENABLE_UNSAFE_SORT_DEFAULT)); - if (offheapsort) { + boolean batchSort = Boolean.parseBoolean(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.LOAD_USE_BATCH_SORT, + CarbonCommonConstants.LOAD_USE_BATCH_SORT_DEFAULT)); + if (batchSort) { + sorter = new UnsafeBatchParallelReadMergeSorterImpl(rowCounter); + } else if (offheapsort) { sorter = new UnsafeParallelReadMergeSorterImpl(rowCounter); } else { sorter = new ParallelReadMergeSorterImpl(rowCounter); @@ -74,7 +80,6 @@ public class SortProcessorStepImpl extends AbstractDataLoadProcessorStep { public Iterator[] execute() throws CarbonDataLoadingException { final Iterator[] iterators = child.execute(); Iterator[] sortedIterators = sorter.sort(iterators); - child.close(); return sortedIterators; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/IntermediateFileMerger.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/IntermediateFileMerger.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/IntermediateFileMerger.java index f063426..1b16675 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/IntermediateFileMerger.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/IntermediateFileMerger.java @@ -32,7 +32,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; public class IntermediateFileMerger implements Callable { /** @@ -334,30 +334,30 @@ public class IntermediateFileMerger implements Callable { char[] aggType = mergerParameters.getAggType(); for (int counter = 0; counter < mergerParameters.getDimColCount(); counter++) { - stream.writeInt((Integer) RemoveDictionaryUtil.getDimension(fieldIndex++, row)); + stream.writeInt((Integer) NonDictionaryUtil.getDimension(fieldIndex++, row)); } // added for high card also if ((mergerParameters.getNoDictionaryCount() + mergerParameters .getComplexDimColCount()) > 0) { - stream.write(RemoveDictionaryUtil.getByteArrayForNoDictionaryCols(row)); + stream.write(NonDictionaryUtil.getByteArrayForNoDictionaryCols(row)); } fieldIndex = 0; for (int counter = 0; counter < mergerParameters.getMeasureColCount(); counter++) { - if (null != RemoveDictionaryUtil.getMeasure(fieldIndex, row)) { + if (null != NonDictionaryUtil.getMeasure(fieldIndex, row)) { stream.write((byte) 1); if (aggType[counter] == CarbonCommonConstants.BYTE_VALUE_MEASURE) { - Double val = (Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Double val = (Double) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeDouble(val); } else if (aggType[counter] == CarbonCommonConstants.SUM_COUNT_VALUE_MEASURE) { - Double val = (Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Double val = (Double) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeDouble(val); } else if (aggType[counter] == CarbonCommonConstants.BIG_INT_MEASURE) { - Long val = (Long) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Long val = (Long) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeLong(val); } else if (aggType[counter] == CarbonCommonConstants.BIG_DECIMAL_MEASURE) { - byte[] bigDecimalInBytes = (byte[]) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + byte[] bigDecimalInBytes = (byte[]) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeInt(bigDecimalInBytes.length); stream.write(bigDecimalInBytes); } @@ -413,19 +413,19 @@ public class IntermediateFileMerger implements Callable { int fieldIndex = 0; for (int counter = 0; counter < mergerParameters.getMeasureColCount(); counter++) { - if (null != RemoveDictionaryUtil.getMeasure(fieldIndex, row)) { + if (null != NonDictionaryUtil.getMeasure(fieldIndex, row)) { stream.write((byte) 1); if (aggType[counter] == CarbonCommonConstants.BYTE_VALUE_MEASURE) { - Double val = (Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Double val = (Double) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeDouble(val); } else if (aggType[counter] == CarbonCommonConstants.SUM_COUNT_VALUE_MEASURE) { - Double val = (Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Double val = (Double) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeDouble(val); } else if (aggType[counter] == CarbonCommonConstants.BIG_INT_MEASURE) { - Long val = (Long) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Long val = (Long) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeLong(val); } else if (aggType[counter] == CarbonCommonConstants.BIG_DECIMAL_MEASURE) { - byte[] bigDecimalInBytes = (byte[]) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + byte[] bigDecimalInBytes = (byte[]) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeInt(bigDecimalInBytes.length); stream.write(bigDecimalInBytes); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparator.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparator.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparator.java index b69d2f1..c282f52 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparator.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparator.java @@ -22,7 +22,7 @@ import java.util.Comparator; import org.apache.carbondata.core.constants.IgnoreDictionary; import org.apache.carbondata.core.util.ByteUtil.UnsafeComparer; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; public class RowComparator implements Comparator { /** @@ -61,7 +61,7 @@ public class RowComparator implements Comparator { ByteBuffer buff1 = ByteBuffer.wrap(byteArr1); // extract a high card dims from complete byte[]. - RemoveDictionaryUtil + NonDictionaryUtil .extractSingleHighCardDims(byteArr1, noDictionaryindex, noDictionaryCount, buff1); byte[] byteArr2 = (byte[]) rowB[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()]; @@ -69,7 +69,7 @@ public class RowComparator implements Comparator { ByteBuffer buff2 = ByteBuffer.wrap(byteArr2); // extract a high card dims from complete byte[]. - RemoveDictionaryUtil + NonDictionaryUtil .extractSingleHighCardDims(byteArr2, noDictionaryindex, noDictionaryCount, buff2); int difference = UnsafeComparer.INSTANCE.compareTo(buff1, buff2); @@ -78,8 +78,8 @@ public class RowComparator implements Comparator { } noDictionaryindex++; } else { - int dimFieldA = RemoveDictionaryUtil.getDimension(normalIndex, rowA); - int dimFieldB = RemoveDictionaryUtil.getDimension(normalIndex, rowB); + int dimFieldA = NonDictionaryUtil.getDimension(normalIndex, rowA); + int dimFieldB = NonDictionaryUtil.getDimension(normalIndex, rowB); diff = dimFieldA - dimFieldB; if (diff != 0) { return diff; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparatorForNormalDims.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparatorForNormalDims.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparatorForNormalDims.java index f590441..ceaf5c6 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparatorForNormalDims.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/RowComparatorForNormalDims.java @@ -18,7 +18,7 @@ package org.apache.carbondata.processing.sortandgroupby.sortdata; import java.util.Comparator; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; /** * This class is used as comparator for comparing dims which are non high cardinality dims. @@ -49,8 +49,8 @@ public class RowComparatorForNormalDims implements Comparator { for (int i = 0; i < dimensionCount; i++) { - int dimFieldA = RemoveDictionaryUtil.getDimension(i, rowA); - int dimFieldB = RemoveDictionaryUtil.getDimension(i, rowB); + int dimFieldA = NonDictionaryUtil.getDimension(i, rowA); + int dimFieldB = NonDictionaryUtil.getDimension(i, rowB); diff = dimFieldA - dimFieldB; if (diff != 0) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortDataRows.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortDataRows.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortDataRows.java index a29b426..794935d 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortDataRows.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortDataRows.java @@ -38,7 +38,7 @@ import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.DataTypeUtil; import org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException; import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; public class SortDataRows { /** @@ -279,28 +279,28 @@ public class SortDataRows { int fieldIndex = 0; for (int dimCount = 0; dimCount < dimColCount; dimCount++) { - stream.writeInt(RemoveDictionaryUtil.getDimension(fieldIndex++, row)); + stream.writeInt(NonDictionaryUtil.getDimension(fieldIndex++, row)); } // if any high cardinality dims are present then write it to the file. if (combinedDimCount > 0) { - stream.write(RemoveDictionaryUtil.getByteArrayForNoDictionaryCols(row)); + stream.write(NonDictionaryUtil.getByteArrayForNoDictionaryCols(row)); } // as measures are stored in separate array. fieldIndex = 0; for (int mesCount = 0; mesCount < parameters.getMeasureColCount(); mesCount++) { - if (null != RemoveDictionaryUtil.getMeasure(fieldIndex, row)) { + if (null != NonDictionaryUtil.getMeasure(fieldIndex, row)) { stream.write((byte) 1); if (aggType[mesCount] == CarbonCommonConstants.SUM_COUNT_VALUE_MEASURE) { - Double val = (Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Double val = (Double) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeDouble(val); } else if (aggType[mesCount] == CarbonCommonConstants.BIG_INT_MEASURE) { - Long val = (Long) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + Long val = (Long) NonDictionaryUtil.getMeasure(fieldIndex, row); stream.writeLong(val); } else if (aggType[mesCount] == CarbonCommonConstants.BIG_DECIMAL_MEASURE) { - BigDecimal val = (BigDecimal) RemoveDictionaryUtil.getMeasure(fieldIndex, row); + BigDecimal val = (BigDecimal) NonDictionaryUtil.getMeasure(fieldIndex, row); byte[] bigDecimalInBytes = DataTypeUtil.bigDecimalToByte(val); stream.writeInt(bigDecimalInBytes.length); stream.write(bigDecimalInBytes); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java index 416a445..fef8c9d 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/SortTempFileChunkHolder.java @@ -37,7 +37,7 @@ import org.apache.carbondata.core.util.ByteUtil.UnsafeComparer; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; public class SortTempFileChunkHolder implements Comparable { @@ -359,7 +359,7 @@ public class SortTempFileChunkHolder implements Comparable 0) { - dataOutputStream.write(RemoveDictionaryUtil.getByteArrayForNoDictionaryCols(row)); + dataOutputStream.write(NonDictionaryUtil.getByteArrayForNoDictionaryCols(row)); } fieldIndex = 0; for (int counter = 0; counter < complexDimensionCount; counter++) { @@ -66,7 +66,7 @@ public class UnCompressedTempSortFileWriter extends AbstractTempSortFileWriter { for (int counter = 0; counter < measureCount; counter++) { if (null != row[fieldIndex]) { dataOutputStream.write((byte) 1); - dataOutputStream.writeDouble((Double) RemoveDictionaryUtil.getMeasure(fieldIndex, row)); + dataOutputStream.writeDouble((Double) NonDictionaryUtil.getMeasure(fieldIndex, row)); } else { dataOutputStream.write((byte) 0); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdatastep/SortKeyStep.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdatastep/SortKeyStep.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdatastep/SortKeyStep.java index a763a8d..03e8b25 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdatastep/SortKeyStep.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdatastep/SortKeyStep.java @@ -28,7 +28,7 @@ import org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAn import org.apache.carbondata.processing.sortandgroupby.sortdata.SortDataRows; import org.apache.carbondata.processing.sortandgroupby.sortdata.SortIntermediateFileMerger; import org.apache.carbondata.processing.sortandgroupby.sortdata.SortParameters; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.RowMetaInterface; @@ -136,7 +136,7 @@ public class SortKeyStep extends BaseStep { } // check if all records are null than send empty row to next step - else if (RemoveDictionaryUtil.checkAllValuesForNull(row)) { + else if (NonDictionaryUtil.checkAllValuesForNull(row)) { // create empty row out size int outSize = Integer.parseInt(meta.getOutputRowSize()); @@ -171,10 +171,10 @@ public class SortKeyStep extends BaseStep { this.meta.getFields(data.getOutputRowMeta(), getStepname(), null, null, this); this.meta.setNoDictionaryCount( - RemoveDictionaryUtil.extractNoDictionaryCount(meta.getNoDictionaryDims())); + NonDictionaryUtil.extractNoDictionaryCount(meta.getNoDictionaryDims())); this.noDictionaryColMaping = - RemoveDictionaryUtil.convertStringToBooleanArr(meta.getNoDictionaryDimsMapping()); + NonDictionaryUtil.convertStringToBooleanArr(meta.getNoDictionaryDimsMapping()); SortParameters parameters = SortParameters.createSortParameters(meta.getDatabaseName(), meta.getTabelName(), meta.getDimensionCount(), meta.getComplexDimensionCount(), meta.getMeasureCount(), http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java index 74a1574..0eeba9d 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java @@ -76,7 +76,7 @@ import org.apache.carbondata.processing.store.writer.CarbonDataWriterVo; import org.apache.carbondata.processing.store.writer.CarbonFactDataWriter; import org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException; import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; import org.apache.spark.sql.types.Decimal; @@ -263,6 +263,9 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { private long schemaUpdatedTimeStamp; private String segmentId; + + private int taskExtension; + /** * current data format version */ @@ -288,6 +291,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { this.aggKeyBlock = new boolean[columnStoreCount]; this.isNoDictionary = new boolean[columnStoreCount]; this.bucketNumber = carbonFactDataHandlerModel.getBucketId(); + this.taskExtension = carbonFactDataHandlerModel.getTaskExtension(); this.isUseInvertedIndex = new boolean[columnStoreCount]; if (null != carbonFactDataHandlerModel.getIsUseInvertedIndex()) { for (int i = 0; i < isUseInvertedIndex.length; i++) { @@ -784,7 +788,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { noDictionaryColumnsData = new byte[noDictionaryCount][noDictionaryData.length][]; for (int i = 0; i < noDictionaryData.length; i++) { int complexColumnIndex = primitiveDimLens.length + noDictionaryCount; - byte[][] splitKey = RemoveDictionaryUtil + byte[][] splitKey = NonDictionaryUtil .splitNoDictionaryKey(noDictionaryData[i], noDictionaryCount + complexIndexMap.size()); int complexTypeIndex = 0; @@ -1013,9 +1017,9 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { byte[] composedNonDictEndKey = null; if (noDictionaryStartKey != null) { composedNonDictStartKey = - RemoveDictionaryUtil.packByteBufferIntoSingleByteArray(noDictionaryStartKey); + NonDictionaryUtil.packByteBufferIntoSingleByteArray(noDictionaryStartKey); composedNonDictEndKey = - RemoveDictionaryUtil.packByteBufferIntoSingleByteArray(noDictionaryEndKey); + NonDictionaryUtil.packByteBufferIntoSingleByteArray(noDictionaryEndKey); } return this.dataWriter .buildDataNodeHolder(blockStorage, dataHolderLocal, entryCountLocal, startkeyLocal, @@ -1430,6 +1434,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { carbonDataWriterVo.setSegmentProperties(segmentProperties); carbonDataWriterVo.setTableBlocksize(tableBlockSize); carbonDataWriterVo.setBucketNumber(bucketNumber); + carbonDataWriterVo.setTaskExtension(taskExtension); carbonDataWriterVo.setSchemaUpdatedTimeStamp(schemaUpdatedTimeStamp); return carbonDataWriterVo; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java index c67dc0e..59f2eb3 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java @@ -185,13 +185,16 @@ public class CarbonFactDataHandlerModel { */ private long schemaUpdatedTimeStamp; + private int taskExtension; + /** * Create the model using @{@link CarbonDataLoadConfiguration} * @param configuration * @return CarbonFactDataHandlerModel */ public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel( - CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId) { + CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, + int taskExtension) { CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier(); @@ -200,8 +203,7 @@ public class CarbonFactDataHandlerModel { boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields()); - int[] dimLensWithComplex = - (int[]) configuration.getDataLoadProperty(DataLoadProcessorConstants.DIMENSION_LENGTHS); + int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality(); List dimsLenList = new ArrayList(); for (int eachDimLen : dimLensWithComplex) { if (eachDimLen != 0) dimsLenList.add(eachDimLen); @@ -293,6 +295,7 @@ public class CarbonFactDataHandlerModel { } carbonFactDataHandlerModel.bucketId = bucketId; carbonFactDataHandlerModel.segmentId = configuration.getSegmentId(); + carbonFactDataHandlerModel.taskExtension = taskExtension; return carbonFactDataHandlerModel; } @@ -524,5 +527,9 @@ public class CarbonFactDataHandlerModel { public void setSegmentId(String segmentId) { this.segmentId = segmentId; } + + public int getTaskExtension() { + return taskExtension; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index ce5aa03..d206057 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -288,7 +288,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter< initFileCount(); String carbonDataFileName = carbonTablePath .getCarbonDataFileName(fileCount, dataWriterVo.getCarbonDataFileAttributes().getTaskId(), - dataWriterVo.getBucketNumber(), + dataWriterVo.getBucketNumber(), dataWriterVo.getTaskExtension(), dataWriterVo.getCarbonDataFileAttributes().getFactTimeStamp()); String actualFileNameVal = carbonDataFileName + CarbonCommonConstants.FILE_INPROGRESS_STATUS; FileData fileData = new FileData(actualFileNameVal, dataWriterVo.getStoreLocation()); @@ -340,7 +340,6 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter< /** * Below method will be used to fill the vlock info details * - * @param infoList info list * @param numberOfRows number of rows in file * @param filePath file path * @param currentPosition current offset @@ -440,7 +439,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter< List blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList); String fileName = dataWriterVo.getStoreLocation() + File.separator + carbonTablePath .getCarbonIndexFileName(dataWriterVo.getCarbonDataFileAttributes().getTaskId(), - dataWriterVo.getBucketNumber(), + dataWriterVo.getBucketNumber(), dataWriterVo.getTaskExtension(), dataWriterVo.getCarbonDataFileAttributes().getFactTimeStamp()); CarbonIndexFileWriter writer = new CarbonIndexFileWriter(); // open file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonDataWriterVo.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonDataWriterVo.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonDataWriterVo.java index 7ba794a..9ed0baa 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonDataWriterVo.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonDataWriterVo.java @@ -68,6 +68,8 @@ public class CarbonDataWriterVo { private long schemaUpdatedTimeStamp; + private int taskExtension; + /** * @return the storeLocation */ @@ -320,4 +322,12 @@ public class CarbonDataWriterVo { public void setSchemaUpdatedTimeStamp(long schemaUpdatedTimeStamp) { this.schemaUpdatedTimeStamp = schemaUpdatedTimeStamp; } + + public int getTaskExtension() { + return taskExtension; + } + + public void setTaskExtension(int taskExtension) { + this.taskExtension = taskExtension; + } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java index 5419858..2d85afa 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java +++ b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java @@ -36,7 +36,7 @@ import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetailsWrapp import org.apache.carbondata.processing.schema.metadata.HierarchiesInfo; import org.apache.carbondata.processing.schema.metadata.TableOptionWrapper; import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; import org.pentaho.di.core.CheckResultInterface; import org.pentaho.di.core.Counter; @@ -670,7 +670,7 @@ public class CarbonCSVBasedSeqGenMeta extends BaseStepMeta implements StepMetaIn tableOptionWrapper.populateTableOptions(tableOption); updateDimensions(carbondim, carbonmsr, noDictionaryDims); - dimColDataTypes = RemoveDictionaryUtil.extractDimColsDataTypeValues(columnsDataTypeString); + dimColDataTypes = NonDictionaryUtil.extractDimColsDataTypeValues(columnsDataTypeString); if (null != complexTypeString) { complexTypes = getComplexTypesMap(complexTypeString); } else { @@ -1073,7 +1073,7 @@ public class CarbonCSVBasedSeqGenMeta extends BaseStepMeta implements StepMetaIn dimColNames = list.toArray(new String[list.size()]); // get high cardinality dimension Array - noDictionaryCols = RemoveDictionaryUtil.extractNoDictionaryDimsArr(noDictionaryDims); + noDictionaryCols = NonDictionaryUtil.extractNoDictionaryDimsArr(noDictionaryDims); String[] sm = null; if (null != msr) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java index 4faeee9..f037cf0 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java +++ b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java @@ -72,7 +72,7 @@ import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetailsWrapp import org.apache.carbondata.processing.schema.metadata.ColumnsInfo; import org.apache.carbondata.processing.schema.metadata.HierarchiesInfo; import org.apache.carbondata.processing.util.CarbonDataProcessorUtil; -import org.apache.carbondata.processing.util.RemoveDictionaryUtil; +import org.apache.carbondata.processing.util.NonDictionaryUtil; import static org.apache.carbondata.processing.constants.TableOptionConstant.BAD_RECORDS_ACTION; import static org.apache.carbondata.processing.constants.TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE; import static org.apache.carbondata.processing.constants.TableOptionConstant.SERIALIZATION_NULL_FORMAT; @@ -934,8 +934,8 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep { Object[] out = populateOutputRow(r); if (out != null) { for (int i = 0; i < meta.normLength - meta.complexTypes.size(); i++) { - if (null == RemoveDictionaryUtil.getDimension(i, out)) { - RemoveDictionaryUtil.setDimension(i, 1, out); + if (null == NonDictionaryUtil.getDimension(i, out)) { + NonDictionaryUtil.setDimension(i, 1, out); } } } @@ -1279,7 +1279,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep { } insertHierIfRequired(out); - RemoveDictionaryUtil + NonDictionaryUtil .prepareOut(newArray, byteBufferArr, out, dimLen - meta.complexTypes.size()); return newArray; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/util/NonDictionaryUtil.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/NonDictionaryUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/NonDictionaryUtil.java new file mode 100644 index 0000000..db3f9da --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/util/NonDictionaryUtil.java @@ -0,0 +1,479 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + */ +package org.apache.carbondata.processing.util; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.constants.IgnoreDictionary; + +/** + * This is the utility class for No Dictionary changes. + */ +public class NonDictionaryUtil { + /** + * Here we are dividing one single object [] into 3 arrays. one for + * dimensions , one for high card, one for measures. + * + * @param out + * @param byteBufferArr + */ + public static void prepareOut(Object[] newOutArr, ByteBuffer[] byteBufferArr, Object[] out, + int dimCount) { + + byte[] nonDictionaryCols = + NonDictionaryUtil.packByteBufferIntoSingleByteArray(byteBufferArr); + Integer[] dimArray = new Integer[dimCount]; + for (int i = 0; i < dimCount; i++) { + dimArray[i] = (Integer) out[i]; + } + + Object[] measureArray = new Object[out.length - dimCount]; + int index = 0; + for (int j = dimCount; j < out.length; j++) { + measureArray[index++] = out[j]; + } + + newOutArr[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; + newOutArr[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = nonDictionaryCols; + newOutArr[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; + + } + + /** + * This method will form one single byte [] for all the high card dims. + * + * @param byteBufferArr + * @return + */ + public static byte[] packByteBufferIntoSingleByteArray(ByteBuffer[] byteBufferArr) { + // for empty array means there is no data to remove dictionary. + if (null == byteBufferArr || byteBufferArr.length == 0) { + return null; + } + int noOfCol = byteBufferArr.length; + short toDetermineLengthOfByteArr = 2; + short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); + int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; + + ByteBuffer buffer = ByteBuffer.allocate(totalBytes); + + // write the length of the byte [] as first short + buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); + // writing the offset of the first element. + buffer.putShort(offsetLen); + + // prepare index for byte [] + for (int index = 0; index < byteBufferArr.length - 1; index++) { + ByteBuffer individualCol = byteBufferArr[index]; + int noOfBytes = individualCol.capacity(); + + buffer.putShort((short) (offsetLen + noOfBytes)); + offsetLen += noOfBytes; + individualCol.rewind(); + } + + // put actual data. + for (int index = 0; index < byteBufferArr.length; index++) { + ByteBuffer individualCol = byteBufferArr[index]; + buffer.put(individualCol.array()); + } + + buffer.rewind(); + return buffer.array(); + + } + + /** + * To calculate the total bytes in byte Buffer[]. + * + * @param byteBufferArr + * @return + */ + private static int calculateTotalBytes(ByteBuffer[] byteBufferArr) { + int total = 0; + for (int index = 0; index < byteBufferArr.length; index++) { + total += byteBufferArr[index].capacity(); + } + return total; + } + + /** + * This method will form one single byte [] for all the high card dims. + * For example if you need to pack 2 columns c1 and c2 , it stores in following way + * + * @param byteBufferArr + * @return + */ + public static byte[] packByteBufferIntoSingleByteArray(byte[][] byteBufferArr) { + // for empty array means there is no data to remove dictionary. + if (null == byteBufferArr || byteBufferArr.length == 0) { + return null; + } + int noOfCol = byteBufferArr.length; + short toDetermineLengthOfByteArr = 2; + short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); + int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; + + ByteBuffer buffer = ByteBuffer.allocate(totalBytes); + + // write the length of the byte [] as first short + buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); + // writing the offset of the first element. + buffer.putShort(offsetLen); + + // prepare index for byte [] + for (int index = 0; index < byteBufferArr.length - 1; index++) { + int noOfBytes = byteBufferArr[index].length; + + buffer.putShort((short) (offsetLen + noOfBytes)); + offsetLen += noOfBytes; + } + + // put actual data. + for (int index = 0; index < byteBufferArr.length; index++) { + buffer.put(byteBufferArr[index]); + } + buffer.rewind(); + return buffer.array(); + + } + + /** + * To calculate the total bytes in byte Buffer[]. + * + * @param byteBufferArr + * @return + */ + private static int calculateTotalBytes(byte[][] byteBufferArr) { + int total = 0; + for (int index = 0; index < byteBufferArr.length; index++) { + total += byteBufferArr[index].length; + } + return total; + } + + /** + * Method to check whether entire row is empty or not. + * + * @param row + * @return + */ + public static boolean checkAllValuesForNull(Object[] row) { + if (checkNullForDims(row[0]) && checkNullForMeasures(row[2]) && null == row[1]) { + return true; + } + return false; + } + + /** + * To check whether the measures are empty/null + * + * @param object + * @return + */ + private static boolean checkNullForMeasures(Object object) { + Object[] measures = (Object[]) object; + for (Object measure : measures) { + if (null != measure) { + return false; + } + } + return true; + } + + /** + * To check whether the dimensions are empty/null + * + * @param object + * @return + */ + private static boolean checkNullForDims(Object object) { + Integer[] dimensions = (Integer[]) object; + for (Integer dimension : dimensions) { + if (null != dimension) { + return false; + } + } + return true; + } + + /** + * Method to get the required Dimension from obj [] + * + * @param index + * @param row + * @return + */ + public static Integer getDimension(int index, Object[] row) { + + Integer[] dimensions = (Integer[]) row[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()]; + + return dimensions[index]; + + } + + /** + * Method to get the required measure from obj [] + * + * @param index + * @param row + * @return + */ + public static Object getMeasure(int index, Object[] row) { + Object[] measures = (Object[]) row[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()]; + return measures[index]; + } + + public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { + + return (byte[]) row[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()]; + } + + public static void prepareOutObj(Object[] out, Integer[] dimArray, byte[] byteBufferArr, + Object[] measureArray) { + + out[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; + out[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = byteBufferArr; + out[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; + + } + + public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, + Object[] measureArray) { + + out[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; + out[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = byteBufferArr; + out[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; + + } + + /** + * This will extract the high cardinality count from the string. + */ + public static int extractNoDictionaryCount(String noDictionaryDim) { + return extractNoDictionaryDimsArr(noDictionaryDim).length; + } + + /** + * This method will split one single byte array of high card dims into array + * of byte arrays. + * + * @param NoDictionaryArr + * @param NoDictionaryCount + * @return + */ + public static byte[][] splitNoDictionaryKey(byte[] NoDictionaryArr, int NoDictionaryCount) { + byte[][] split = new byte[NoDictionaryCount][]; + + ByteBuffer buff = ByteBuffer.wrap(NoDictionaryArr, 2, NoDictionaryCount * 2); + + int remainingCol = NoDictionaryCount; + short secIndex = 0; + short firstIndex = 0; + for (int i = 0; i < NoDictionaryCount; i++) { + + if (remainingCol == 1) { + firstIndex = buff.getShort(); + int length = NoDictionaryArr.length - firstIndex; + + // add 2 bytes (short) as length required to determine size of + // each column value. + + split[i] = new byte[length + 2]; + ByteBuffer splittedCol = ByteBuffer.wrap(split[i]); + splittedCol.putShort((short) length); + + System.arraycopy(NoDictionaryArr, firstIndex, split[i], 2, length); + + } else { + + firstIndex = buff.getShort(); + secIndex = buff.getShort(); + int length = secIndex - firstIndex; + + // add 2 bytes (short) as length required to determine size of + // each column value. + + split[i] = new byte[length + 2]; + ByteBuffer splittedCol = ByteBuffer.wrap(split[i]); + splittedCol.putShort((short) length); + + System.arraycopy(NoDictionaryArr, firstIndex, split[i], 2, length); + buff.position(buff.position() - 2); + + } + remainingCol--; + } + + return split; + } + + /** + * @param index + * @param val + */ + public static void setDimension(int index, int val, Object[] objArr) { + Integer[] dimensions = (Integer[]) objArr[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()]; + + dimensions[index] = val; + + } + + /** + * This will extract the high cardinality count from the string. + */ + public static String[] extractNoDictionaryDimsArr(String noDictionaryDim) { + + if (null == noDictionaryDim || noDictionaryDim.isEmpty()) { + return new String[0]; + } + + String[] NoDictionary = noDictionaryDim.split(CarbonCommonConstants.COMA_SPC_CHARACTER); + List list1 = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); + for (int i = 0; i < NoDictionary.length; i++) { + String[] dim = NoDictionary[i].split(CarbonCommonConstants.COLON_SPC_CHARACTER); + list1.add(dim[0]); + } + + return list1.toArray(new String[list1.size()]); + } + /** + * This will extract the high cardinality count from the string. + */ + public static Map extractDimColsDataTypeValues(String colDataTypes) { + Map mapOfColNameDataType = + new HashMap(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + if (null == colDataTypes || colDataTypes.isEmpty()) { + return mapOfColNameDataType; + } + String[] colArray = colDataTypes.split(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER); + String[] colValueArray = null; + for (String colArrayVal : colArray) { + colValueArray = colArrayVal.split(CarbonCommonConstants.COMA_SPC_CHARACTER); + mapOfColNameDataType.put(colValueArray[0].toLowerCase(), colValueArray[1]); + + } + return mapOfColNameDataType; + } + + public static byte[] convertListByteArrToSingleArr(List noDictionaryValKeyList) { + ByteBuffer[] buffArr = new ByteBuffer[noDictionaryValKeyList.size()]; + int index = 0; + for (byte[] singleColVal : noDictionaryValKeyList) { + buffArr[index] = ByteBuffer.allocate(singleColVal.length); + buffArr[index].put(singleColVal); + buffArr[index++].rewind(); + } + + return NonDictionaryUtil.packByteBufferIntoSingleByteArray(buffArr); + + } + + /** + * This method will convert boolean [] to String with comma separated. + * This needs to be done as sort step meta only supports string types. + * + * @param noDictionaryDimsMapping boolean arr to represent which dims is no dictionary + * @return + */ + public static String convertBooleanArrToString(Boolean[] noDictionaryDimsMapping) { + StringBuilder builder = new StringBuilder(); + int index = 0; + for (; index < noDictionaryDimsMapping.length ; index++) { + builder.append(noDictionaryDimsMapping[index]); + builder.append(CarbonCommonConstants.COMA_SPC_CHARACTER); + } + int lastIndex = builder.lastIndexOf(CarbonCommonConstants.COMA_SPC_CHARACTER); + String str = -1 != lastIndex ? builder.substring(0, lastIndex) : builder.toString(); + return str; + } + + /** + * This will convert string to boolean[]. + * + * @param noDictionaryColMapping String representation of the boolean []. + * @return + */ + public static boolean[] convertStringToBooleanArr(String noDictionaryColMapping) { + + String[] splittedValue = null != noDictionaryColMapping ? + noDictionaryColMapping.split(CarbonCommonConstants.COMA_SPC_CHARACTER) : + new String[0]; + + // convert string[] to boolean [] + + boolean[] noDictionaryMapping = new boolean[splittedValue.length]; + int index = 0; + for (String str : splittedValue) { + noDictionaryMapping[index++] = Boolean.parseBoolean(str); + } + + return noDictionaryMapping; + } + + /** + * This method will extract the single dimension from the complete high card dims byte[].+ * + * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat + * + * @param highCardArr + * @param index + * @param highCardinalityCount + * @param outBuffer + */ + public static void extractSingleHighCardDims(byte[] highCardArr, int index, + int highCardinalityCount, ByteBuffer outBuffer) { + ByteBuffer buff = null; + short secIndex = 0; + short firstIndex = 0; + int length; + // if the requested index is a last one then we need to calculate length + // based on byte[] length. + if (index == highCardinalityCount - 1) { + // need to read 2 bytes(1 short) to determine starting offset and + // length can be calculated by array length. + buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 2); + } else { + // need to read 4 bytes(2 short) to determine starting offset and + // length. + buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 4); + } + + firstIndex = buff.getShort(); + // if it is a last dimension in high card then this will be last + // offset.so calculate length from total length + if (index == highCardinalityCount - 1) { + secIndex = (short) highCardArr.length; + } else { + secIndex = buff.getShort(); + } + + length = secIndex - firstIndex; + + outBuffer.position(firstIndex); + outBuffer.limit(outBuffer.position() + length); + + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b13ead9c/processing/src/main/java/org/apache/carbondata/processing/util/RemoveDictionaryUtil.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/RemoveDictionaryUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/RemoveDictionaryUtil.java deleted file mode 100644 index d71d12f..0000000 --- a/processing/src/main/java/org/apache/carbondata/processing/util/RemoveDictionaryUtil.java +++ /dev/null @@ -1,479 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * - */ -package org.apache.carbondata.processing.util; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.constants.IgnoreDictionary; - -/** - * This is the utility class for No Dictionary changes. - */ -public class RemoveDictionaryUtil { - /** - * Here we are dividing one single object [] into 3 arrays. one for - * dimensions , one for high card, one for measures. - * - * @param out - * @param byteBufferArr - */ - public static void prepareOut(Object[] newOutArr, ByteBuffer[] byteBufferArr, Object[] out, - int dimCount) { - - byte[] nonDictionaryCols = - RemoveDictionaryUtil.packByteBufferIntoSingleByteArray(byteBufferArr); - Integer[] dimArray = new Integer[dimCount]; - for (int i = 0; i < dimCount; i++) { - dimArray[i] = (Integer) out[i]; - } - - Object[] measureArray = new Object[out.length - dimCount]; - int index = 0; - for (int j = dimCount; j < out.length; j++) { - measureArray[index++] = out[j]; - } - - newOutArr[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; - newOutArr[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = nonDictionaryCols; - newOutArr[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; - - } - - /** - * This method will form one single byte [] for all the high card dims. - * - * @param byteBufferArr - * @return - */ - public static byte[] packByteBufferIntoSingleByteArray(ByteBuffer[] byteBufferArr) { - // for empty array means there is no data to remove dictionary. - if (null == byteBufferArr || byteBufferArr.length == 0) { - return null; - } - int noOfCol = byteBufferArr.length; - short toDetermineLengthOfByteArr = 2; - short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); - int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; - - ByteBuffer buffer = ByteBuffer.allocate(totalBytes); - - // write the length of the byte [] as first short - buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); - // writing the offset of the first element. - buffer.putShort(offsetLen); - - // prepare index for byte [] - for (int index = 0; index < byteBufferArr.length - 1; index++) { - ByteBuffer individualCol = byteBufferArr[index]; - int noOfBytes = individualCol.capacity(); - - buffer.putShort((short) (offsetLen + noOfBytes)); - offsetLen += noOfBytes; - individualCol.rewind(); - } - - // put actual data. - for (int index = 0; index < byteBufferArr.length; index++) { - ByteBuffer individualCol = byteBufferArr[index]; - buffer.put(individualCol.array()); - } - - buffer.rewind(); - return buffer.array(); - - } - - /** - * To calculate the total bytes in byte Buffer[]. - * - * @param byteBufferArr - * @return - */ - private static int calculateTotalBytes(ByteBuffer[] byteBufferArr) { - int total = 0; - for (int index = 0; index < byteBufferArr.length; index++) { - total += byteBufferArr[index].capacity(); - } - return total; - } - - /** - * This method will form one single byte [] for all the high card dims. - * For example if you need to pack 2 columns c1 and c2 , it stores in following way - * - * @param byteBufferArr - * @return - */ - public static byte[] packByteBufferIntoSingleByteArray(byte[][] byteBufferArr) { - // for empty array means there is no data to remove dictionary. - if (null == byteBufferArr || byteBufferArr.length == 0) { - return null; - } - int noOfCol = byteBufferArr.length; - short toDetermineLengthOfByteArr = 2; - short offsetLen = (short) (noOfCol * 2 + toDetermineLengthOfByteArr); - int totalBytes = calculateTotalBytes(byteBufferArr) + offsetLen; - - ByteBuffer buffer = ByteBuffer.allocate(totalBytes); - - // write the length of the byte [] as first short - buffer.putShort((short) (totalBytes - toDetermineLengthOfByteArr)); - // writing the offset of the first element. - buffer.putShort(offsetLen); - - // prepare index for byte [] - for (int index = 0; index < byteBufferArr.length - 1; index++) { - int noOfBytes = byteBufferArr[index].length; - - buffer.putShort((short) (offsetLen + noOfBytes)); - offsetLen += noOfBytes; - } - - // put actual data. - for (int index = 0; index < byteBufferArr.length; index++) { - buffer.put(byteBufferArr[index]); - } - buffer.rewind(); - return buffer.array(); - - } - - /** - * To calculate the total bytes in byte Buffer[]. - * - * @param byteBufferArr - * @return - */ - private static int calculateTotalBytes(byte[][] byteBufferArr) { - int total = 0; - for (int index = 0; index < byteBufferArr.length; index++) { - total += byteBufferArr[index].length; - } - return total; - } - - /** - * Method to check whether entire row is empty or not. - * - * @param row - * @return - */ - public static boolean checkAllValuesForNull(Object[] row) { - if (checkNullForDims(row[0]) && checkNullForMeasures(row[2]) && null == row[1]) { - return true; - } - return false; - } - - /** - * To check whether the measures are empty/null - * - * @param object - * @return - */ - private static boolean checkNullForMeasures(Object object) { - Object[] measures = (Object[]) object; - for (Object measure : measures) { - if (null != measure) { - return false; - } - } - return true; - } - - /** - * To check whether the dimensions are empty/null - * - * @param object - * @return - */ - private static boolean checkNullForDims(Object object) { - Integer[] dimensions = (Integer[]) object; - for (Integer dimension : dimensions) { - if (null != dimension) { - return false; - } - } - return true; - } - - /** - * Method to get the required Dimension from obj [] - * - * @param index - * @param row - * @return - */ - public static Integer getDimension(int index, Object[] row) { - - Integer[] dimensions = (Integer[]) row[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()]; - - return dimensions[index]; - - } - - /** - * Method to get the required measure from obj [] - * - * @param index - * @param row - * @return - */ - public static Object getMeasure(int index, Object[] row) { - Object[] measures = (Object[]) row[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()]; - return measures[index]; - } - - public static byte[] getByteArrayForNoDictionaryCols(Object[] row) { - - return (byte[]) row[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()]; - } - - public static void prepareOutObj(Object[] out, Integer[] dimArray, byte[] byteBufferArr, - Object[] measureArray) { - - out[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; - out[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = byteBufferArr; - out[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; - - } - - public static void prepareOutObj(Object[] out, int[] dimArray, byte[][] byteBufferArr, - Object[] measureArray) { - - out[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()] = dimArray; - out[IgnoreDictionary.BYTE_ARRAY_INDEX_IN_ROW.getIndex()] = byteBufferArr; - out[IgnoreDictionary.MEASURES_INDEX_IN_ROW.getIndex()] = measureArray; - - } - - /** - * This will extract the high cardinality count from the string. - */ - public static int extractNoDictionaryCount(String noDictionaryDim) { - return extractNoDictionaryDimsArr(noDictionaryDim).length; - } - - /** - * This method will split one single byte array of high card dims into array - * of byte arrays. - * - * @param NoDictionaryArr - * @param NoDictionaryCount - * @return - */ - public static byte[][] splitNoDictionaryKey(byte[] NoDictionaryArr, int NoDictionaryCount) { - byte[][] split = new byte[NoDictionaryCount][]; - - ByteBuffer buff = ByteBuffer.wrap(NoDictionaryArr, 2, NoDictionaryCount * 2); - - int remainingCol = NoDictionaryCount; - short secIndex = 0; - short firstIndex = 0; - for (int i = 0; i < NoDictionaryCount; i++) { - - if (remainingCol == 1) { - firstIndex = buff.getShort(); - int length = NoDictionaryArr.length - firstIndex; - - // add 2 bytes (short) as length required to determine size of - // each column value. - - split[i] = new byte[length + 2]; - ByteBuffer splittedCol = ByteBuffer.wrap(split[i]); - splittedCol.putShort((short) length); - - System.arraycopy(NoDictionaryArr, firstIndex, split[i], 2, length); - - } else { - - firstIndex = buff.getShort(); - secIndex = buff.getShort(); - int length = secIndex - firstIndex; - - // add 2 bytes (short) as length required to determine size of - // each column value. - - split[i] = new byte[length + 2]; - ByteBuffer splittedCol = ByteBuffer.wrap(split[i]); - splittedCol.putShort((short) length); - - System.arraycopy(NoDictionaryArr, firstIndex, split[i], 2, length); - buff.position(buff.position() - 2); - - } - remainingCol--; - } - - return split; - } - - /** - * @param index - * @param val - */ - public static void setDimension(int index, int val, Object[] objArr) { - Integer[] dimensions = (Integer[]) objArr[IgnoreDictionary.DIMENSION_INDEX_IN_ROW.getIndex()]; - - dimensions[index] = val; - - } - - /** - * This will extract the high cardinality count from the string. - */ - public static String[] extractNoDictionaryDimsArr(String noDictionaryDim) { - - if (null == noDictionaryDim || noDictionaryDim.isEmpty()) { - return new String[0]; - } - - String[] NoDictionary = noDictionaryDim.split(CarbonCommonConstants.COMA_SPC_CHARACTER); - List list1 = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); - for (int i = 0; i < NoDictionary.length; i++) { - String[] dim = NoDictionary[i].split(CarbonCommonConstants.COLON_SPC_CHARACTER); - list1.add(dim[0]); - } - - return list1.toArray(new String[list1.size()]); - } - /** - * This will extract the high cardinality count from the string. - */ - public static Map extractDimColsDataTypeValues(String colDataTypes) { - Map mapOfColNameDataType = - new HashMap(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); - if (null == colDataTypes || colDataTypes.isEmpty()) { - return mapOfColNameDataType; - } - String[] colArray = colDataTypes.split(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER); - String[] colValueArray = null; - for (String colArrayVal : colArray) { - colValueArray = colArrayVal.split(CarbonCommonConstants.COMA_SPC_CHARACTER); - mapOfColNameDataType.put(colValueArray[0].toLowerCase(), colValueArray[1]); - - } - return mapOfColNameDataType; - } - - public static byte[] convertListByteArrToSingleArr(List noDictionaryValKeyList) { - ByteBuffer[] buffArr = new ByteBuffer[noDictionaryValKeyList.size()]; - int index = 0; - for (byte[] singleColVal : noDictionaryValKeyList) { - buffArr[index] = ByteBuffer.allocate(singleColVal.length); - buffArr[index].put(singleColVal); - buffArr[index++].rewind(); - } - - return RemoveDictionaryUtil.packByteBufferIntoSingleByteArray(buffArr); - - } - - /** - * This method will convert boolean [] to String with comma separated. - * This needs to be done as sort step meta only supports string types. - * - * @param noDictionaryDimsMapping boolean arr to represent which dims is no dictionary - * @return - */ - public static String convertBooleanArrToString(Boolean[] noDictionaryDimsMapping) { - StringBuilder builder = new StringBuilder(); - int index = 0; - for (; index < noDictionaryDimsMapping.length ; index++) { - builder.append(noDictionaryDimsMapping[index]); - builder.append(CarbonCommonConstants.COMA_SPC_CHARACTER); - } - int lastIndex = builder.lastIndexOf(CarbonCommonConstants.COMA_SPC_CHARACTER); - String str = -1 != lastIndex ? builder.substring(0, lastIndex) : builder.toString(); - return str; - } - - /** - * This will convert string to boolean[]. - * - * @param noDictionaryColMapping String representation of the boolean []. - * @return - */ - public static boolean[] convertStringToBooleanArr(String noDictionaryColMapping) { - - String[] splittedValue = null != noDictionaryColMapping ? - noDictionaryColMapping.split(CarbonCommonConstants.COMA_SPC_CHARACTER) : - new String[0]; - - // convert string[] to boolean [] - - boolean[] noDictionaryMapping = new boolean[splittedValue.length]; - int index = 0; - for (String str : splittedValue) { - noDictionaryMapping[index++] = Boolean.parseBoolean(str); - } - - return noDictionaryMapping; - } - - /** - * This method will extract the single dimension from the complete high card dims byte[].+ * - * The format of the byte [] will be, Totallength,CompleteStartOffsets,Dat - * - * @param highCardArr - * @param index - * @param highCardinalityCount - * @param outBuffer - */ - public static void extractSingleHighCardDims(byte[] highCardArr, int index, - int highCardinalityCount, ByteBuffer outBuffer) { - ByteBuffer buff = null; - short secIndex = 0; - short firstIndex = 0; - int length; - // if the requested index is a last one then we need to calculate length - // based on byte[] length. - if (index == highCardinalityCount - 1) { - // need to read 2 bytes(1 short) to determine starting offset and - // length can be calculated by array length. - buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 2); - } else { - // need to read 4 bytes(2 short) to determine starting offset and - // length. - buff = ByteBuffer.wrap(highCardArr, (index * 2) + 2, 4); - } - - firstIndex = buff.getShort(); - // if it is a last dimension in high card then this will be last - // offset.so calculate length from total length - if (index == highCardinalityCount - 1) { - secIndex = (short) highCardArr.length; - } else { - secIndex = buff.getShort(); - } - - length = secIndex - firstIndex; - - outBuffer.position(firstIndex); - outBuffer.limit(outBuffer.position() + length); - - } -}