carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject carbondata git commit: [CARBONDATA-2926] fixed ArrayIndexOutOfBoundException with varchar columns and empty sort_columns
Date Fri, 14 Sep 2018 14:08:15 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 2b7c8b374 -> f354e0b2b


[CARBONDATA-2926] fixed ArrayIndexOutOfBoundException with varchar columns and empty sort_columns

problem:
ArrayIndexOutOfBoundException if varchar column is present before dictionary columns along
with empty sort_columns.

root cause:
CarbonFactDataHandlerColumnar.isVarcharColumnFull() method uses model.getVarcharDimIdxInNoDict()
and index of varchar column in no dictonary array became negative.
currently index was calculated based on ordinal-number of dictionary columns. This can go
negative in no_sort column case,

solution:
take the varchar dimension index from no dictionary array from at runtime based on schema.

Also hotfix: removed number validation in table properties of sdk, now we support 7 properties

This closes #2705


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f354e0b2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f354e0b2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f354e0b2

Branch: refs/heads/master
Commit: f354e0b2b5704a3846b622f9256dc898022b9177
Parents: 2b7c8b3
Author: ajantha-bhat <ajanthabhat@gmail.com>
Authored: Mon Sep 10 23:08:15 2018 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Fri Sep 14 19:38:03 2018 +0530

----------------------------------------------------------------------
 .../carbondata/core/datastore/TableSpec.java    | 17 +++++++++++
 .../schema/table/TableSchemaBuilder.java        |  9 +++++-
 .../sdv/generated/SDKwriterTestCase.scala       | 31 ++++++++++++++++++++
 .../store/CarbonFactDataHandlerModel.java       | 10 ++++---
 .../carbondata/processing/store/TablePage.java  |  4 ++-
 .../sdk/file/CarbonWriterBuilder.java           |  6 ----
 6 files changed, 65 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java b/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
index 4d30cb0..bded430 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/TableSpec.java
@@ -20,6 +20,7 @@ package org.apache.carbondata.core.datastore;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -36,6 +37,14 @@ public class TableSpec {
   private DimensionSpec[] dimensionSpec;
   private MeasureSpec[] measureSpec;
 
+  // Many places we might have to access no-dictionary column spec.
+  // but no-dictionary column spec are not always in below order like,
+  // dictionary + no dictionary + complex + measure
+  // when sort_columns are empty, no columns are selected for sorting.
+  // so, spec will not be in above order.
+  // Hence noDictionaryDimensionSpec will be useful and it will be subset of dimensionSpec.
+  private List<DimensionSpec> noDictionaryDimensionSpec;
+
   // number of simple dimensions
   private int numSimpleDimensions;
 
@@ -56,6 +65,7 @@ public class TableSpec {
     }
     dimensionSpec = new DimensionSpec[dimensions.size()];
     measureSpec = new MeasureSpec[measures.size()];
+    noDictionaryDimensionSpec = new ArrayList<>();
     addDimensions(dimensions);
     addMeasures(measures);
   }
@@ -67,10 +77,12 @@ public class TableSpec {
       if (dimension.isComplex()) {
         DimensionSpec spec = new DimensionSpec(ColumnType.COMPLEX, dimension);
         dimensionSpec[dimIndex++] = spec;
+        noDictionaryDimensionSpec.add(spec);
       } else if (dimension.getDataType() == DataTypes.TIMESTAMP && !dimension
           .isDirectDictionaryEncoding()) {
         DimensionSpec spec = new DimensionSpec(ColumnType.PLAIN_VALUE, dimension);
         dimensionSpec[dimIndex++] = spec;
+        noDictionaryDimensionSpec.add(spec);
       } else if (dimension.isDirectDictionaryEncoding()) {
         DimensionSpec spec = new DimensionSpec(ColumnType.DIRECT_DICTIONARY, dimension);
         dimensionSpec[dimIndex++] = spec;
@@ -80,6 +92,7 @@ public class TableSpec {
       } else {
         DimensionSpec spec = new DimensionSpec(ColumnType.PLAIN_VALUE, dimension);
         dimensionSpec[dimIndex++] = spec;
+        noDictionaryDimensionSpec.add(spec);
       }
     }
   }
@@ -95,6 +108,10 @@ public class TableSpec {
     return dimensionSpec[dimensionIndex];
   }
 
+  public List<DimensionSpec> getNoDictionaryDimensionSpec() {
+    return noDictionaryDimensionSpec;
+  }
+
   public MeasureSpec getMeasureSpec(int measureIndex) {
     return measureSpec[measureIndex];
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index c8eaa16..f1be5ca 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -50,6 +50,8 @@ public class TableSchemaBuilder {
 
   private List<ColumnSchema> dimension = new LinkedList<>();
 
+  private List<ColumnSchema> varCharColumns = new LinkedList<>();
+
   private List<ColumnSchema> complex = new LinkedList<>();
 
   private List<ColumnSchema> measures = new LinkedList<>();
@@ -107,6 +109,7 @@ public class TableSchemaBuilder {
     schema.setSchemaEvolution(schemaEvol);
     List<ColumnSchema> allColumns = new LinkedList<>(sortColumns);
     allColumns.addAll(dimension);
+    allColumns.addAll(varCharColumns);
     allColumns.addAll(complex);
     allColumns.addAll(measures);
     schema.setListOfColumns(allColumns);
@@ -214,7 +217,11 @@ public class TableSchemaBuilder {
           || isComplexChild) {
         complex.add(newColumn);
       } else {
-        dimension.add(newColumn);
+        if (field.getDataType() == DataTypes.VARCHAR) {
+          varCharColumns.add(newColumn);
+        } else {
+          dimension.add(newColumn);
+        }
       }
     }
     if (newColumn.isDimensionColumn()) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
index 40a0a62..267a05c 100644
--- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
+++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SDKwriterTestCase.scala
@@ -756,6 +756,37 @@ class SDKwriterTestCase extends QueryTest with BeforeAndAfterEach {
     sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'")
     checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
   }
+
+  test("Test sdk with longstring with empty sort column and some direct dictionary columns")
{
+    // here we specify the longstring column as varchar
+    val schema = new StringBuilder()
+      .append("[ \n")
+      .append("   {\"address\":\"varchar\"},\n")
+      .append("   {\"date1\":\"date\"},\n")
+      .append("   {\"date2\":\"date\"},\n")
+      .append("   {\"age\":\"int\"}\n")
+      .append("]")
+      .toString()
+    val builder = CarbonWriter.builder()
+    val writer = builder
+      .outputPath(writerPath)
+      .sortBy(Array[String]())
+      .buildWriterForCSVInput(Schema.parseJson(schema))
+
+    for (i <- 0 until 5) {
+      writer
+        .write(Array[String](RandomStringUtils.randomAlphabetic(40000),
+          "1999-12-01",
+          "1998-12-01",
+          i.toString))
+    }
+    writer.close()
+
+    assert(FileFactory.getCarbonFile(writerPath).exists)
+    sql("DROP TABLE IF EXISTS sdkTable")
+    sql(s"CREATE EXTERNAL TABLE sdkTable STORED BY 'carbondata' LOCATION '$writerPath'")
+    checkAnswer(sql("select count(*) from sdkTable"), Seq(Row(5)))
+  }
 }
 
 object avroUtil{

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
index 54dd0aa..1a38de6 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerModel.java
@@ -224,14 +224,16 @@ public class CarbonFactDataHandlerModel {
       simpleDimsLen[i] = dimLens[i];
     }
 
+    int noDictionayDimensionIndex = 0;
     // for dynamic page size in write step if varchar columns exist
     List<Integer> varcharDimIdxInNoDict = new ArrayList<>();
     for (DataField dataField : configuration.getDataFields()) {
       CarbonColumn column = dataField.getColumn();
-      if (!column.isComplex() && !dataField.hasDictionaryEncoding() &&
-              column.getDataType() == DataTypes.VARCHAR) {
-        // ordinal is set in CarbonTable.fillDimensionsAndMeasuresForTables()
-        varcharDimIdxInNoDict.add(column.getOrdinal() - simpleDimsCount);
+      if (!dataField.hasDictionaryEncoding()) {
+        if (!column.isComplex() && column.getDataType() == DataTypes.VARCHAR) {
+          varcharDimIdxInNoDict.add(noDictionayDimensionIndex);
+        }
+        noDictionayDimensionIndex++;
       }
     }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
index 73746d6..a311483 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java
@@ -195,8 +195,10 @@ public class TablePage {
     if (noDictionaryCount > 0 || complexColumnCount > 0) {
       TableSpec tableSpec = model.getTableSpec();
       byte[][] noDictAndComplex = WriteStepRowUtil.getNoDictAndComplexDimension(row);
+      List<TableSpec.DimensionSpec> noDictionaryDimensionSpec =
+          tableSpec.getNoDictionaryDimensionSpec();
       for (int i = 0; i < noDictAndComplex.length; i++) {
-        if (tableSpec.getDimensionSpec(dictDimensionPages.length + i).getSchemaDataType()
+        if (noDictionaryDimensionSpec.get(i).getSchemaDataType()
             == DataTypes.VARCHAR) {
           byte[] valueWithLength = addIntLengthToByteArray(noDictAndComplex[i]);
           noDictDimensionPages[i].putData(rowId, valueWithLength);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f354e0b2/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index 5f3e7b8..388870f 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -294,12 +294,6 @@ public class CarbonWriterBuilder {
    */
   public CarbonWriterBuilder withTableProperties(Map<String, String> options) {
     Objects.requireNonNull(options, "Table properties should not be null");
-    //validate the options.
-    if (options.size() > 5) {
-      throw new IllegalArgumentException("Supports only 5 options now. "
-          + "Refer method header or documentation");
-    }
-
     Set<String> supportedOptions = new HashSet<>(Arrays
         .asList("table_blocksize", "table_blocklet_size", "local_dictionary_threshold",
             "local_dictionary_enable", "sort_columns", "sort_scope", "long_string_columns"));


Mime
View raw message