carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From manishgupt...@apache.org
Subject carbondata git commit: [CARBONDATA-3134] fixed null values when cachelevel is set as blocklet
Date Wed, 28 Nov 2018 10:08:11 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 295734cc8 -> a5f080b67


[CARBONDATA-3134] fixed null values when cachelevel is set as blocklet

Problem:
For each blocklet an object of SegmentPropertiesAndSchemaHolder is created to store the schema
used for query. This object is created only if no other blocklet has the same schema. To check
the schema we are comparing List<ColumnSchema>, as the equals method in ColumnSchema
does not check for columnUniqueId therefore this check is failing and the new restructured
blocklet is using the schema of the old blocklet. Due to this the newly added column is being
ignored as the old blocklet schema specifies that the column is delete(alter drop).

Solution:
Instead of checking the equality through equals and hashcode, write a new implementation for
both and check based on columnUniqueId.

This closes #2956


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a5f080b6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a5f080b6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a5f080b6

Branch: refs/heads/master
Commit: a5f080b6752a7a789463455edbbe4b888f6694e3
Parents: 295734c
Author: kunal642 <kunalkapoor642@gmail.com>
Authored: Tue Nov 27 14:13:27 2018 +0530
Committer: manishgupta88 <tomanishgupta18@gmail.com>
Committed: Wed Nov 28 15:43:24 2018 +0530

----------------------------------------------------------------------
 .../block/SegmentPropertiesAndSchemaHolder.java | 40 ++++++++++++++++++--
 .../schema/table/column/ColumnSchema.java       |  4 ++
 .../StandardPartitionTableQueryTestCase.scala   |  2 +-
 3 files changed, 42 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
index 1b7e1f8..6f9a93d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
@@ -18,6 +18,8 @@ package org.apache.carbondata.core.datastore.block;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -332,13 +334,45 @@ public class SegmentPropertiesAndSchemaHolder {
       }
       SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper other =
           (SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper) obj;
-      return tableIdentifier.equals(other.tableIdentifier) && columnsInTable
-          .equals(other.columnsInTable) && Arrays
+      return tableIdentifier.equals(other.tableIdentifier) && checkColumnSchemaEquality(
+          columnsInTable, other.columnsInTable) && Arrays
           .equals(columnCardinality, other.columnCardinality);
     }
 
+    private boolean checkColumnSchemaEquality(List<ColumnSchema> obj1, List<ColumnSchema>
obj2) {
+      if (obj1 == null || obj2 == null || (obj1.size() != obj2.size())) {
+        return false;
+      }
+      List<ColumnSchema> clonedObj1 = new ArrayList<>(obj1);
+      List<ColumnSchema> clonedObj2 = new ArrayList<>(obj2);
+      clonedObj1.addAll(obj1);
+      clonedObj2.addAll(obj2);
+      sortList(clonedObj1);
+      sortList(clonedObj2);
+      boolean exists = true;
+      for (int i = 0; i < obj1.size(); i++) {
+        if (!clonedObj1.get(i).equalsWithStrictCheck(clonedObj2.get(i))) {
+          exists = false;
+          break;
+        }
+      }
+      return exists;
+    }
+
+    private void sortList(List<ColumnSchema> columnSchemas) {
+      Collections.sort(columnSchemas, new Comparator<ColumnSchema>() {
+        @Override public int compare(ColumnSchema o1, ColumnSchema o2) {
+          return o1.getColumnUniqueId().compareTo(o2.getColumnUniqueId());
+        }
+      });
+    }
+
     @Override public int hashCode() {
-      return tableIdentifier.hashCode() + columnsInTable.hashCode() + Arrays
+      int allColumnsHashCode = 0;
+      for (ColumnSchema columnSchema: columnsInTable) {
+        allColumnsHashCode = allColumnsHashCode + columnSchema.strictHashCode();
+      }
+      return tableIdentifier.hashCode() + allColumnsHashCode + Arrays
           .hashCode(columnCardinality);
     }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
index cf7ecab..0606cbd 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
@@ -297,6 +297,10 @@ public class ColumnSchema implements Serializable, Writable {
     return result;
   }
 
+  public int strictHashCode() {
+    return hashCode() + columnUniqueId.hashCode() + encodingList.hashCode();
+  }
+
   /**
    * Overridden equals method for columnSchema
    */

http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
index 8107cd5..c7957c1 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
@@ -440,7 +440,7 @@ test("Creation of partition table should fail if the colname in table
schema and
   test("validate data in partition table after dropping and adding a column") {
     sql("drop table if exists par")
     sql("create table par(name string) partitioned by (age double) stored by " +
-              "'carbondata'")
+              "'carbondata' TBLPROPERTIES('cache_level'='blocklet')")
     sql(s"load data local inpath '$resourcesPath/uniqwithoutheader.csv' into table par options"
+
         s"('header'='false')")
     sql("alter table par drop columns(name)")


Mime
View raw message