carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [39/50] [abbrv] carbondata git commit: [CARBONDATA-1946] Exception thrown after alter data type change operation on dictionary exclude integer type column
Date Sun, 07 Jan 2018 03:05:47 GMT
[CARBONDATA-1946] Exception thrown after alter data type change operation on dictionary exclude
integer type column

Problem: After restructure change data type operation (INT to BIGINT) on dictionary exclude
INT type column if select query is triggered then exception is thrown.

Analysis: This is happening because while retrieving the data the vector is created for BIGINT
type (size 8 bytes) which but the actual length of each data is 4 bytes and there is length
check while reading the data which is failing.

Solution: Added a new restructuredType variable in vector and assigned the block dimension
data type to it.

This closes #1732


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1f54c472
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1f54c472
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1f54c472

Branch: refs/heads/carbonstore
Commit: 1f54c47282bc201f2071bc8c9cc1be19baf0c9a1
Parents: 38038ad
Author: manishgupta88 <tomanishgupta18@gmail.com>
Authored: Wed Dec 27 23:09:58 2017 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Thu Jan 4 20:31:10 2018 +0530

----------------------------------------------------------------------
 ...feVariableLengthDimensionDataChunkStore.java |  6 +++--
 ...afeVariableLengthDimesionDataChunkStore.java |  6 +++--
 .../DictionaryBasedVectorResultCollector.java   |  4 ++++
 .../scan/executor/util/RestructureUtil.java     |  2 --
 .../scan/result/vector/CarbonColumnVector.java  | 14 ++++++++++++
 .../vector/impl/CarbonColumnVectorImpl.java     | 12 ++++++++++
 .../carbondata/core/util/DataTypeUtil.java      | 23 +++++++++++++++++++-
 .../presto/CarbonColumnVectorWrapper.java       | 12 ++++++++++
 .../vectorreader/ColumnarVectorWrapper.java     | 12 ++++++++++
 .../vectorreader/ChangeDataTypeTestCases.scala  | 15 +++++++++++++
 10 files changed, 99 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
index f0d18dc..db83198 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
@@ -24,7 +24,7 @@ import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.util.ByteUtil;
-
+import org.apache.carbondata.core.util.DataTypeUtil;
 
 /**
  * Below class is responsible to store variable length dimension data chunk in
@@ -153,7 +153,9 @@ public class SafeVariableLengthDimensionDataChunkStore extends SafeAbsractDimens
       } else if (dt == DataTypes.INT) {
         vector.putInt(vectorRow, ByteUtil.toInt(data, currentDataOffset, length));
       } else if (dt == DataTypes.LONG) {
-        vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length));
+        vector.putLong(vectorRow, DataTypeUtil
+            .getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(),
+                currentDataOffset, length));
       } else if (dt  == DataTypes.TIMESTAMP) {
         vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length) * 1000L);
       }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
index d6af052..36b2bd8 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
@@ -25,7 +25,7 @@ import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector;
 import org.apache.carbondata.core.util.ByteUtil;
-
+import org.apache.carbondata.core.util.DataTypeUtil;
 
 /**
  * Below class is responsible to store variable length dimension data chunk in
@@ -177,7 +177,9 @@ public class UnsafeVariableLengthDimesionDataChunkStore
       } else if (dt == DataTypes.INT) {
         vector.putInt(vectorRow, ByteUtil.toInt(value, 0, value.length));
       } else if (dt == DataTypes.LONG) {
-        vector.putLong(vectorRow, ByteUtil.toLong(value, 0, value.length));
+        vector.putLong(vectorRow, DataTypeUtil
+            .getDataBasedOnRestructuredDataType(value, vector.getBlockDataType(), 0,
+                value.length));
       } else if (dt == DataTypes.TIMESTAMP) {
         vector.putLong(vectorRow, ByteUtil.toLong(value, 0, value.length) * 1000L);
       }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
index 10888fe..5e6c99a 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
@@ -181,6 +181,10 @@ public class DictionaryBasedVectorResultCollector extends AbstractScannedResultC
       allColumnInfo[i].offset = rowCounter;
       allColumnInfo[i].vectorOffset = columnarBatch.getRowCounter();
       allColumnInfo[i].vector = columnarBatch.columnVectors[i];
+      if (null != allColumnInfo[i].dimension) {
+        allColumnInfo[i].vector
+            .setBlockDataType(allColumnInfo[i].dimension.getDimension().getDataType());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
index b3a77b8..572400d 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/RestructureUtil.java
@@ -83,8 +83,6 @@ public class RestructureUtil {
           if (tableDimension.getColumnId().equals(queryDimension.getDimension().getColumnId()))
{
             QueryDimension currentBlockDimension = new QueryDimension(tableDimension.getColName());
             tableDimension.getColumnSchema()
-                .setDataType(queryDimension.getDimension().getDataType());
-            tableDimension.getColumnSchema()
                 .setPrecision(queryDimension.getDimension().getColumnSchema().getPrecision());
             tableDimension.getColumnSchema()
                 .setScale(queryDimension.getDimension().getColumnSchema().getScale());

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java
index 40a52e3..b606a50 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java
@@ -67,6 +67,20 @@ public interface CarbonColumnVector {
 
   DataType getType();
 
+  /**
+   * Method to be used for getting the restructured data type. This method will used for
+   * retrieving the data after change in data type restructure operation
+   *
+   * @return
+   */
+  DataType getBlockDataType();
+
+  /**
+   * Method to be used for setting the restructured data type. This method will used for
+   * retrieving the data after change in data type restructure operation
+   */
+  void setBlockDataType(DataType blockDataType);
+
   void setFilteredRowsExist(boolean filteredRowsExist);
 
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java
b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java
index 5f8233c..e431aaf 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java
@@ -51,6 +51,8 @@ public class CarbonColumnVectorImpl implements CarbonColumnVector {
 
   private DataType dataType;
 
+  private DataType blockDataType;
+
   /**
    * True if there is at least one NULL byte set. This is an optimization for the writer,
to skip
    * having to clear NULL bits.
@@ -235,6 +237,16 @@ public class CarbonColumnVectorImpl implements CarbonColumnVector {
     return dataType;
   }
 
+  @Override
+  public DataType getBlockDataType() {
+    return blockDataType;
+  }
+
+  @Override
+  public void setBlockDataType(DataType blockDataType) {
+    this.blockDataType = blockDataType;
+  }
+
   @Override public void setFilteredRowsExist(boolean filteredRowsExist) {
 
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index 65ea63e..5d188b5 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -775,4 +775,25 @@ public final class DataTypeUtil {
     }
   }
 
-}
\ No newline at end of file
+  /**
+   * Method to type case the data based on modified data type. This method will used for
+   * retrieving the data after change in data type restructure operation
+   *
+   * @param data
+   * @param restructuredDataType
+   * @param currentDataOffset
+   * @param length
+   * @return
+   */
+  public static long getDataBasedOnRestructuredDataType(byte[] data, DataType restructuredDataType,
+      int currentDataOffset, int length) {
+    long value = 0L;
+    if (restructuredDataType == DataTypes.INT) {
+      value = ByteUtil.toInt(data, currentDataOffset, length);
+    } else if (restructuredDataType == DataTypes.LONG) {
+      value = ByteUtil.toLong(data, currentDataOffset, length);
+    }
+    return value;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java
b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java
index e19a598..78a1ea8 100644
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java
@@ -33,6 +33,8 @@ public class CarbonColumnVectorWrapper implements CarbonColumnVector {
 
   private boolean filteredRowsExist;
 
+  private DataType blockDataType;
+
   public CarbonColumnVectorWrapper(CarbonColumnVectorImpl columnVector, boolean[] filteredRows)
{
     this.columnVector = columnVector;
     this.filteredRows = filteredRows;
@@ -203,6 +205,16 @@ public class CarbonColumnVectorWrapper implements CarbonColumnVector
{
     return columnVector.getType();
   }
 
+  @Override
+  public DataType getBlockDataType() {
+    return blockDataType;
+  }
+
+  @Override
+  public void setBlockDataType(DataType blockDataType) {
+    this.blockDataType = blockDataType;
+  }
+
   @Override public void setFilteredRowsExist(boolean filteredRowsExist) {
     this.filteredRowsExist = filteredRowsExist;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java
b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java
index 9387276..7d42130 100644
--- a/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java
+++ b/integration/spark2/src/main/java/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java
@@ -38,6 +38,8 @@ class ColumnarVectorWrapper implements CarbonColumnVector {
 
   private DataType dataType;
 
+  private DataType blockDataType;
+
   public ColumnarVectorWrapper(ColumnVector columnVector, boolean[] filteredRows) {
     this.columnVector = columnVector;
     this.filteredRows = filteredRows;
@@ -211,6 +213,16 @@ class ColumnarVectorWrapper implements CarbonColumnVector {
     return dataType;
   }
 
+  @Override
+  public DataType getBlockDataType() {
+    return blockDataType;
+  }
+
+  @Override
+  public void setBlockDataType(DataType blockDataType) {
+    this.blockDataType = blockDataType;
+  }
+
   @Override public void setFilteredRowsExist(boolean filteredRowsExist) {
     this.filteredRowsExist = filteredRowsExist;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/1f54c472/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
index 16c0895..0124716 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
@@ -164,6 +164,21 @@ class ChangeDataTypeTestCases extends Spark2QueryTest with BeforeAndAfterAll
{
     sql("drop table if exists PreAggMain_preagg1")
   }
 
+  test("test data type change for dictionary exclude INT type column") {
+    sql("drop table if exists table_sort")
+    sql("CREATE TABLE table_sort (imei int,age int,mac string) STORED BY 'carbondata' TBLPROPERTIES('DICTIONARY_EXCLUDE'='imei,age','SORT_COLUMNS'='imei,age')")
+    sql("insert into table_sort select 32674,32794,'MAC1'")
+    sql("alter table table_sort change age age bigint")
+    sql("insert into table_sort select 32675,9223372036854775807,'MAC2'")
+    try {
+      sqlContext.setConf("carbon.enable.vector.reader", "true")
+      checkAnswer(sql("select * from table_sort"),
+        Seq(Row(32674, 32794, "MAC1"), Row(32675, Long.MaxValue, "MAC2")))
+    } finally {
+      sqlContext.setConf("carbon.enable.vector.reader", "true")
+    }
+  }
+
   override def afterAll {
     sql("DROP TABLE IF EXISTS changedatatypetest")
     sql("DROP TABLE IF EXISTS hivetable")


Mime
View raw message