carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [21/47] incubator-carbondata git commit: [CARBONDATA-95] Columns values with numeric data types are not getting parsed when included in dictionary_include (#853)
Date Mon, 01 Aug 2016 10:05:19 GMT
[CARBONDATA-95] Columns values with numeric data types are not getting parsed when included
in dictionary_include (#853)

Problem: Columns values with numeric data types are not getting parsed when included in dictionary_include

Analysis: When a numeric datatype lets say Decimal is defined for a column and the column
is included as dictionary_include, then the whatever precision and scale is defined by the
user is not taken into consideration and each value is accepted and dictionary is generated
for that value.

Solution: Parse big decimal while generating global dictionary and dictionary look up and
set the precision and scale specified by the user

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/4b6314cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/4b6314cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/4b6314cc

Branch: refs/heads/master
Commit: 4b6314cc337e5af8ecffc68ee1ffd3c32f9b754e
Parents: 577764b
Author: manishgupta88 <tomanishgupta18@gmail.com>
Authored: Mon Jul 25 15:45:57 2016 +0530
Committer: Venkata Ramana G <g.ramana.v1@gmail.com>
Committed: Mon Jul 25 15:45:57 2016 +0530

----------------------------------------------------------------------
 .../org/carbondata/core/util/DataTypeUtil.java  | 97 +++++++++++---------
 .../spark/tasks/DictionaryWriterTask.scala      | 48 +++++-----
 .../resources/decimalBoundaryDataCarbon.csv     | 22 ++---
 .../test/resources/decimalBoundaryDataHive.csv  | 22 ++---
 .../testsuite/bigdecimal/TestBigDecimal.scala   | 19 +++-
 .../processing/datatypes/PrimitiveDataType.java | 29 ++++--
 .../processing/mdkeygen/MDKeyGenStepMeta.java   |  3 +-
 .../CarbonCSVBasedDimSurrogateKeyGen.java       | 19 ++++
 .../csvbased/CarbonCSVBasedSeqGenMeta.java      |  3 +-
 .../csvbased/CarbonCSVBasedSeqGenStep.java      | 68 +++++++++++---
 .../processing/util/CarbonSchemaParser.java     |  1 +
 11 files changed, 220 insertions(+), 111 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/core/src/main/java/org/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/carbondata/core/util/DataTypeUtil.java
index e6d6422..d0037e1 100644
--- a/core/src/main/java/org/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/carbondata/core/util/DataTypeUtil.java
@@ -3,15 +3,11 @@ package org.carbondata.core.util;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.math.RoundingMode;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
 
 import org.carbondata.core.carbon.metadata.datatype.DataType;
+import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension;
 import org.carbondata.core.carbon.metadata.schema.table.column.CarbonMeasure;
 import org.carbondata.core.constants.CarbonCommonConstants;
-
-import org.apache.commons.lang.NumberUtils;
 public final class DataTypeUtil {
 
   private DataTypeUtil() {
@@ -154,76 +150,95 @@ public final class DataTypeUtil {
    * @param actualDataType actual data type
    * @return actual data after conversion
    */
-  public static Object getDataBasedOnDataType(String data, DataType actualDataType) {
+  public static Object normalizeIntAndLongValues(String data, DataType actualDataType) {
     if (null == data) {
       return null;
     }
     try {
+      Object parsedValue = null;
       switch (actualDataType) {
         case INT:
+          parsedValue = Integer.parseInt(data);
+          break;
         case LONG:
-        case DOUBLE:
-        case DECIMAL:
-          if (!NumberUtils.isNumber(data)) {
-            return null;
-          }
-          return data;
-        case TIMESTAMP:
-          if (data.isEmpty()) {
-            return null;
-          }
-          SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance()
-              .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
-                  CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
-          Date dateToStr = null;
-          try {
-            dateToStr = parser.parse(data);
-            return dateToStr.getTime();
-          } catch (ParseException e) {
-            return null;
-          }
+          parsedValue = Long.parseLong(data);
+          break;
         default:
           return data;
       }
+      if(null != parsedValue) {
+        return data;
+      }
+      return null;
     } catch (NumberFormatException ex) {
       return null;
     }
   }
 
   /**
-   * This method will parse a given string value corresponding to its datatype
+   * This method will parse a given string value corresponding to its data type
    *
-   * @param value    value to parse
-   * @param dataType datatype for that value
+   * @param value     value to parse
+   * @param dimension dimension to get data type and precision and scale in case of decimal
+   *                  data type
    * @return
    */
-  public static boolean validateColumnValueForItsDataType(String value, DataType dataType)
{
+  public static String normalizeColumnValueForItsDataType(String value, CarbonDimension dimension)
{
     try {
       Object parsedValue = null;
       // validation will not be done for timestamp datatype as for timestamp direct dictionary
       // is generated. No dictionary file is created for timestamp datatype column
-      switch (dataType) {
+      switch (dimension.getDataType()) {
         case DECIMAL:
-          parsedValue = new BigDecimal(value);
-          break;
+          return parseStringToBigDecimal(value, dimension);
         case INT:
-          parsedValue = Integer.parseInt(value);
-          break;
         case LONG:
-          parsedValue = Long.valueOf(value);
+          parsedValue = normalizeIntAndLongValues(value, dimension.getDataType());
           break;
         case DOUBLE:
-          parsedValue = Double.valueOf(value);
+          parsedValue = Double.parseDouble(value);
           break;
         default:
-          return true;
+          return value;
       }
       if (null != parsedValue) {
-        return true;
+        return value;
       }
-      return false;
+      return null;
     } catch (Exception e) {
-      return false;
+      return null;
+    }
+  }
+
+  /**
+   * This method will parse a value to its datatype if datatype is decimal else will return
+   * the value passed
+   *
+   * @param value     value to be parsed
+   * @param dimension
+   * @return
+   */
+  public static String parseValue(String value, CarbonDimension dimension) {
+    try {
+      switch (dimension.getDataType()) {
+        case DECIMAL:
+          return parseStringToBigDecimal(value, dimension);
+        default:
+          return value;
+      }
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  private static String parseStringToBigDecimal(String value, CarbonDimension dimension)
{
+    BigDecimal bigDecimal = new BigDecimal(value)
+        .setScale(dimension.getColumnSchema().getScale(), RoundingMode.HALF_UP);
+    BigDecimal normalizedValue =
+        normalizeDecimalValue(bigDecimal, dimension.getColumnSchema().getPrecision());
+    if (null != normalizedValue) {
+      return normalizedValue.toString();
     }
+    return null;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
index aacf402..380e76b 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/tasks/DictionaryWriterTask.scala
@@ -65,24 +65,24 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String],
         if (model.dictFileExists(columnIndex)) {
           if (dictionary.getSurrogateKey(values(0)) == CarbonCommonConstants
             .INVALID_SURROGATE_KEY) {
-            val parseSuccess = org.carbondata.core.util.DataTypeUtil
-              .validateColumnValueForItsDataType(values(0),
-                model.primDimensions(columnIndex).getDataType);
-            if (parseSuccess) {
-              writer.write(values(0))
-              distinctValues.add(values(0))
+            val parsedValue = org.carbondata.core.util.DataTypeUtil
+              .normalizeColumnValueForItsDataType(values(0),
+                model.primDimensions(columnIndex))
+            if (null != parsedValue) {
+              writer.write(parsedValue)
+              distinctValues.add(parsedValue)
             }
           }
           for (i <- 1 until values.length) {
             if (preValue != values(i)) {
               if (dictionary.getSurrogateKey(values(i)) ==
                   CarbonCommonConstants.INVALID_SURROGATE_KEY) {
-                val parseSuccess = org.carbondata.core.util.DataTypeUtil
-                  .validateColumnValueForItsDataType(values(i),
-                    model.primDimensions(columnIndex).getDataType);
-                if (parseSuccess) {
-                  writer.write(values(i))
-                  distinctValues.add(values(i))
+                val parsedValue = org.carbondata.core.util.DataTypeUtil
+                  .normalizeColumnValueForItsDataType(values(i),
+                    model.primDimensions(columnIndex))
+                if (null != parsedValue) {
+                  writer.write(parsedValue)
+                  distinctValues.add(parsedValue)
                   preValue = values(i)
                 }
               }
@@ -90,21 +90,21 @@ class DictionaryWriterTask(valuesBuffer: mutable.HashSet[String],
           }
 
         } else {
-          val parseSuccess = org.carbondata.core.util.DataTypeUtil
-            .validateColumnValueForItsDataType(values(0),
-              model.primDimensions(columnIndex).getDataType);
-          if (parseSuccess) {
-            writer.write(values(0))
-            distinctValues.add(values(0))
+          val parsedValue = org.carbondata.core.util.DataTypeUtil
+            .normalizeColumnValueForItsDataType(values(0),
+              model.primDimensions(columnIndex))
+          if (null != parsedValue) {
+            writer.write(parsedValue)
+            distinctValues.add(parsedValue)
           }
           for (i <- 1 until values.length) {
             if (preValue != values(i)) {
-              val parseSuccess = org.carbondata.core.util.DataTypeUtil
-                .validateColumnValueForItsDataType(values(i),
-                  model.primDimensions(columnIndex).getDataType);
-              if (parseSuccess) {
-                writer.write(values(i))
-                distinctValues.add(values(i))
+              val parsedValue = org.carbondata.core.util.DataTypeUtil
+                .normalizeColumnValueForItsDataType(values(i),
+                  model.primDimensions(columnIndex))
+              if (null != parsedValue) {
+                writer.write(parsedValue)
+                distinctValues.add(parsedValue)
                 preValue = values(i)
               }
             }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv b/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
index c64a9bf..1744362 100644
--- a/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
+++ b/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
@@ -1,12 +1,12 @@
 ID,date,country,name,phonetype,serialname,salary
-1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.0000000000
-2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.0000000000
-3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.0000000000
-4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.0000000000
-5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.0000000000
-6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.0000000000
-7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.0000000000
-8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.0000000000
-9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.0000000000
-10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.0000000000
-11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.0000000000
+1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.1234567890123
+2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.1234567890123
+3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.1234567890123
+4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.1234567890123
+5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.1234567890123
+6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.1234567890123
+7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.1234567890123
+8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.1234567890123
+9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.1234567890123
+10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.1234567890123
+11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.1234567890123

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/decimalBoundaryDataHive.csv b/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
index a2faaf1..2bcf7f5 100644
--- a/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
+++ b/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
@@ -1,11 +1,11 @@
-1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.0000000000
-2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.0000000000
-3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.0000000000
-4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.0000000000
-5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.0000000000
-6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.0000000000
-7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.0000000000
-8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.0000000000
-9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.0000000000
-10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.0000000000
-11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.0000000000
+1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.1234567890123
+2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.1234567890123
+3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.1234567890123
+4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.1234567890123
+5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.1234567890123
+6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.1234567890123
+7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.1234567890123
+8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.1234567890123
+9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.1234567890123
+10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.1234567890123
+11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.1234567890123

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
index 95dd1b4..b07705d 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
@@ -34,12 +34,15 @@ class TestBigDecimal extends QueryTest with BeforeAndAfterAll {
   override def beforeAll {
     sql("drop table if exists carbonTable")
     sql("drop table if exists hiveTable")
+    sql("drop table if exists hiveBigDecimal")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
     sql("CREATE TABLE IF NOT EXISTS carbonTable (ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary Decimal(17,2))STORED BY 'org.apache.carbondata.format'")
     sql("create table if not exists hiveTable(ID Int, date Timestamp, country String, name
String, phonetype String, serialname String, salary Decimal(17,2))row format delimited fields
terminated by ','")
     sql("LOAD DATA LOCAL INPATH './src/test/resources/decimalDataWithHeader.csv' into table
carbonTable")
     sql("LOAD DATA local inpath './src/test/resources/decimalDataWithoutHeader.csv' INTO
table hiveTable")
+    sql("create table if not exists hiveBigDecimal(ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10))row format delimited
fields terminated by ','")
+    sql("LOAD DATA local inpath './src/test/resources/decimalBoundaryDataHive.csv' INTO table
hiveBigDecimal")
   }
 
   test("test detail query on big decimal column") {
@@ -111,11 +114,8 @@ class TestBigDecimal extends QueryTest with BeforeAndAfterAll {
 
   test("test aggregation on big decimal column with increased precision") {
     sql("drop table if exists carbonBigDecimal")
-    sql("drop table if exists hiveBigDecimal")
     sql("create table if not exists carbonBigDecimal (ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10)) STORED BY 'org.apache.carbondata.format'")
-    sql("create table if not exists hiveBigDecimal(ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10))row format delimited
fields terminated by ','")
     sql("LOAD DATA LOCAL INPATH './src/test/resources/decimalBoundaryDataCarbon.csv' into
table carbonBigDecimal")
-    sql("LOAD DATA local inpath './src/test/resources/decimalBoundaryDataHive.csv' INTO table
hiveBigDecimal")
 
     checkAnswer(sql("select sum(salary) from carbonBigDecimal"),
       sql("select sum(salary) from hiveBigDecimal"))
@@ -124,13 +124,24 @@ class TestBigDecimal extends QueryTest with BeforeAndAfterAll {
       sql("select sum(distinct salary) from hiveBigDecimal"))
 
     sql("drop table if exists carbonBigDecimal")
-    sql("drop table if exists hiveBigDecimal")
+  }
+
+  test("test big decimal for dictionary look up") {
+    sql("drop table if exists decimalDictLookUp")
+    sql("create table if not exists decimalDictLookUp (ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10)) STORED BY 'org.apache.carbondata.format'
TBLPROPERTIES('dictionary_include'='salary')")
+    sql("LOAD DATA LOCAL INPATH './src/test/resources/decimalBoundaryDataCarbon.csv' into
table decimalDictLookUp")
+
+    checkAnswer(sql("select sum(salary) from decimalDictLookUp"),
+      sql("select sum(salary) from hiveBigDecimal"))
+
+    sql("drop table if exists decimalDictLookUp")
   }
   
 
   override def afterAll {
     sql("drop table if exists carbonTable")
     sql("drop table if exists hiveTable")
+    sql("drop table if exists hiveBigDecimal")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
   }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
index e63d727..3d272d5 100644
--- a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
+++ b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -28,6 +28,7 @@ import java.util.List;
 import org.carbondata.core.constants.CarbonCommonConstants;
 import org.carbondata.core.keygenerator.KeyGenException;
 import org.carbondata.core.keygenerator.KeyGenerator;
+import org.carbondata.core.util.DataTypeUtil;
 import org.carbondata.processing.surrogatekeysgenerator.csvbased.CarbonCSVBasedDimSurrogateKeyGen;
 
 import org.pentaho.di.core.exception.KettleException;
@@ -58,6 +59,11 @@ public class PrimitiveDataType implements GenericDataType {
   private String columnId;
 
   /**
+   * dimension ordinal of primitive type column
+   */
+  private int dimensionOrdinal;
+
+  /**
    * key size
    */
   private int keySize;
@@ -74,14 +80,16 @@ public class PrimitiveDataType implements GenericDataType {
 
   /**
    * constructor
+   *
    * @param name
    * @param parentname
    * @param columnId
    */
-  public PrimitiveDataType(String name, String parentname, String columnId) {
+  public PrimitiveDataType(String name, String parentname, String columnId, int dimensionOrdinal)
{
     this.name = name;
     this.parentname = parentname;
     this.columnId = columnId;
+    this.dimensionOrdinal = dimensionOrdinal;
   }
 
   /*
@@ -151,15 +159,22 @@ public class PrimitiveDataType implements GenericDataType {
   /*
    * parse string and generate surrogate
    */
-  @Override
-  public void parseStringAndWriteByteArray(String tableName, String inputString,
+  @Override public void parseStringAndWriteByteArray(String tableName, String inputString,
       String[] delimiter, int delimiterIndex, DataOutputStream dataOutputStream,
       CarbonCSVBasedDimSurrogateKeyGen surrogateKeyGen) throws KettleException, IOException
{
-    Integer surrogateKey = surrogateKeyGen
-        .generateSurrogateKeys(inputString, tableName + CarbonCommonConstants.UNDERSCORE
+ name,
-            this.getColumnId());
-    if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) {
+    String parsedValue = DataTypeUtil.parseValue(inputString,
+        surrogateKeyGen.getDimensionNameToDimensionMapping()
+            .get(tableName + CarbonCommonConstants.UNDERSCORE + name));
+    Integer surrogateKey = null;
+    if (null == parsedValue) {
       surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
+    } else {
+      surrogateKey = surrogateKeyGen
+          .generateSurrogateKeys(parsedValue, tableName + CarbonCommonConstants.UNDERSCORE
+ name,
+              this.getColumnId());
+      if (surrogateKey == CarbonCommonConstants.INVALID_SURROGATE_KEY) {
+        surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
+      }
     }
     dataOutputStream.writeInt(surrogateKey);
   }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java
b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java
index 9b65e63..902de1e 100644
--- a/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java
+++ b/processing/src/main/java/org/carbondata/processing/mdkeygen/MDKeyGenStepMeta.java
@@ -496,7 +496,8 @@ public class MDKeyGenStepMeta extends BaseStepMeta implements StepMetaInterface
             g.addChildren(new StructDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
             break;
           default:
-            g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
+            g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3],
+                Integer.parseInt(levelInfo[4])));
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
index f9c3b6f..8fc1196 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
@@ -33,6 +33,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import org.carbondata.common.logging.LogService;
 import org.carbondata.common.logging.LogServiceFactory;
 import org.carbondata.core.cache.dictionary.Dictionary;
+import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension;
 import org.carbondata.core.constants.CarbonCommonConstants;
 import org.carbondata.core.file.manager.composite.IFileManagerComposite;
 import org.carbondata.core.keygenerator.KeyGenException;
@@ -116,6 +117,10 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
       new HashMap<String, Map<ArrayWrapper, Integer>>(
           CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
   /**
+   * dimension name to dimension mapping
+   */
+  private Map<String, CarbonDimension> dimensionNameToDimensionMapping;
+  /**
    * rwLock2
    */
   private ReentrantReadWriteLock rwLock2 = new ReentrantReadWriteLock();
@@ -512,4 +517,18 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
     this.measureMaxSurroagetMap = measureMaxSurroagetMap;
   }
 
+  /**
+   * @return
+   */
+  public Map<String, CarbonDimension> getDimensionNameToDimensionMapping() {
+    return dimensionNameToDimensionMapping;
+  }
+
+  /**
+   * @param dimensionNameToDimensionMapping
+   */
+  public void setDimensionNameToDimensionMapping(
+      Map<String, CarbonDimension> dimensionNameToDimensionMapping) {
+    this.dimensionNameToDimensionMapping = dimensionNameToDimensionMapping;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java
b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java
index dad2bbc..e96be83 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenMeta.java
@@ -1026,7 +1026,8 @@ public class CarbonCSVBasedSeqGenMeta extends BaseStepMeta implements
StepMetaIn
             g.addChildren(new StructDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
             break;
           default:
-            g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
+            g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3],
+                Integer.parseInt(levelInfo[4])));
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
index 2978ad2..207e68d 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
@@ -48,6 +48,7 @@ import org.carbondata.core.cache.dictionary.Dictionary;
 import org.carbondata.core.carbon.metadata.CarbonMetadata;
 import org.carbondata.core.carbon.metadata.datatype.DataType;
 import org.carbondata.core.carbon.metadata.schema.table.CarbonTable;
+import org.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension;
 import org.carbondata.core.carbon.metadata.schema.table.column.CarbonMeasure;
 import org.carbondata.core.constants.CarbonCommonConstants;
 import org.carbondata.core.file.manager.composite.FileData;
@@ -57,7 +58,10 @@ import org.carbondata.core.keygenerator.KeyGenerator;
 import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
 import org.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
 import org.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
-import org.carbondata.core.util.*;
+import org.carbondata.core.util.CarbonProperties;
+import org.carbondata.core.util.CarbonTimeStatisticsFactory;
+import org.carbondata.core.util.CarbonUtil;
+import org.carbondata.core.util.DataTypeUtil;
 import org.carbondata.core.writer.ByteArrayHolder;
 import org.carbondata.core.writer.HierarchyValueWriterForCSV;
 import org.carbondata.processing.dataprocessor.manager.CarbonDataProcessorManager;
@@ -374,7 +378,6 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
           surrogateKeyGen = new FileStoreSurrogateKeyGenForCSV(columnsInfo, meta.getPartitionID(),
               meta.getSegmentId(), meta.getTaskNo());
           data.setSurrogateKeyGen(surrogateKeyGen);
-
           updateStoreLocation();
 
           // Check the insert hierarchies required or not based on that
@@ -431,6 +434,8 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
         columnSchemaDetailsWrapper = meta.getColumnSchemaDetailsWrapper();
         if (null != getInputRowMeta()) {
           generateNoDictionaryAndComplexIndexMapping();
+          data.getSurrogateKeyGen()
+              .setDimensionNameToDimensionMapping(populateNameToCarbonDimensionMap());
         }
         serializationNullFormat = meta.getTableOptionWrapper().get("serialization_null_format");
       }
@@ -933,8 +938,9 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
       String columnName = metaColumnNames[j];
       String foreignKeyColumnName = foreignKeyMappingColumns[j];
       // check if it is ignore dictionary dimension or not . if yes directly write byte buffer
+      String tuple = (String) r[j];
       if (isNoDictionaryColumn[j]) {
-        processnoDictionaryDim(noDictionaryAndComplexIndexMapping[j], (String) r[j], dataTypes[j],
+        processnoDictionaryDim(noDictionaryAndComplexIndexMapping[j], tuple, dataTypes[j],
             isStringDataType[j], byteBufferArr);
         continue;
       }
@@ -1028,7 +1034,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
           int[] surrogateKeyForHierarchy = null;
           if (null != cache) {
 
-            Integer keyFromCsv = dicCache.getSurrogateKey(((String) r[j]));
+            Integer keyFromCsv = dicCache.getSurrogateKey(tuple);
 
             if (null != keyFromCsv) {
               surrogateKeyForHierarchy = cache.get(keyFromCsv);
@@ -1049,7 +1055,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
           } else {
             surrogateKeyForHierarchy = new int[1];
             surrogateKeyForHierarchy[0] =
-                surrogateKeyGen.generateSurrogateKeys((String) r[j], foreignKeyColumnName);
+                surrogateKeyGen.generateSurrogateKeys(tuple, foreignKeyColumnName);
           }
           for (int k = 0; k < surrogateKeyForHierarchy.length; k++) {
             if (dimPresentCsvOrder[i]) {
@@ -1069,7 +1075,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
               complexTypes[noDictionaryAndComplexIndexMapping[j] - meta.noDictionaryCols.length];
           ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
           DataOutputStream dataOutputStream = new DataOutputStream(byteArray);
-          complexType.parseStringAndWriteByteArray(meta.getTableName(), (String) r[j],
+          complexType.parseStringAndWriteByteArray(meta.getTableName(), tuple,
               new String[] { meta.getComplexDelimiterLevel1(), meta.getComplexDelimiterLevel2()
},
               0, dataOutputStream, surrogateKeyGen);
           byteBufferArr[noDictionaryAndComplexIndexMapping[j]] =
@@ -1094,7 +1100,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
         Int2ObjectMap<int[]> cache = surrogateKeyGen.getHierCache().get(actualHierName);
         int[] surrogateKeyForHrrchy = null;
         if (null != cache) {
-          Integer keyFromCsv = dicCache.getSurrogateKey(((String) r[j]));
+          Integer keyFromCsv = dicCache.getSurrogateKey(tuple);
 
           if (null != keyFromCsv) {
             surrogateKeyForHrrchy = cache.get(keyFromCsv);
@@ -1137,12 +1143,20 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
                   DirectDictionaryKeyGeneratorFactory
                       .getDirectDictionaryGenerator(details.getColumnType());
               surrogateKeyForHrrchy[0] =
-                  directDictionaryGenerator1.generateDirectSurrogateKey(((String) r[j]));
+                  directDictionaryGenerator1.generateDirectSurrogateKey(tuple);
               surrogateKeyGen.max[m] = Integer.MAX_VALUE;
 
             } else {
-              surrogateKeyForHrrchy[0] =
-                  surrogateKeyGen.generateSurrogateKeys(((String) r[j]), foreignKeyColumnName);
+              String parsedValue = DataTypeUtil.parseValue(tuple,
+                  data.getSurrogateKeyGen().getDimensionNameToDimensionMapping().get(
+                      meta.getTableName() + CarbonCommonConstants.UNDERSCORE + columnName
+                          .toLowerCase()));
+              if(null == parsedValue) {
+                surrogateKeyForHrrchy[0] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
+              } else {
+                surrogateKeyForHrrchy[0] =
+                    surrogateKeyGen.generateSurrogateKeys(parsedValue, foreignKeyColumnName);
+              }
             }
           }
           if (surrogateKeyForHrrchy[0] == CarbonCommonConstants.INVALID_SURROGATE_KEY) {
@@ -1763,7 +1777,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
       boolean isStringDataType, ByteBuffer[] out) {
     if (!(isStringDataType)) {
       if (null == DataTypeUtil
-          .getDataBasedOnDataType(dimensionValue, DataTypeUtil.getDataType(dataType))) {
+          .normalizeIntAndLongValues(dimensionValue, DataTypeUtil.getDataType(dataType)))
{
         dimensionValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
       }
     }
@@ -1824,5 +1838,37 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
     }
   }
 
+  private Map<String, CarbonDimension> populateNameToCarbonDimensionMap() {
+    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(
+        meta.getSchemaName() + CarbonCommonConstants.UNDERSCORE + meta.getTableName());
+    List<CarbonDimension> dimensionsList = carbonTable.getDimensionByTableName(meta.getTableName());
+    Map<String, CarbonDimension> dimensionNameToDimensionMapping =
+        new HashMap<>(dimensionsList.size());
+    for (CarbonDimension dimension : dimensionsList) {
+      if (dimension.isComplex()) {
+        populateComplexDimension(dimensionNameToDimensionMapping, dimension);
+      } else {
+        dimensionNameToDimensionMapping.put(
+            meta.getTableName() + CarbonCommonConstants.UNDERSCORE + dimension.getColName()
+                .toLowerCase(), dimension);
+      }
+    }
+    return dimensionNameToDimensionMapping;
+  }
+
+  private void populateComplexDimension(
+      Map<String, CarbonDimension> dimensionNameToDimensionMapping, CarbonDimension
dimension) {
+    List<CarbonDimension> listOfChildDimensions = dimension.getListOfChildDimensions();
+    for (CarbonDimension childDimension : listOfChildDimensions) {
+      if (childDimension.isComplex()) {
+        populateComplexDimension(dimensionNameToDimensionMapping, childDimension);
+      } else {
+        dimensionNameToDimensionMapping.put(
+            meta.getTableName() + CarbonCommonConstants.UNDERSCORE + childDimension.getColName(),
+            childDimension);
+      }
+    }
+  }
+
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4b6314cc/processing/src/main/java/org/carbondata/processing/util/CarbonSchemaParser.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/util/CarbonSchemaParser.java
b/processing/src/main/java/org/carbondata/processing/util/CarbonSchemaParser.java
index e0b1f1a..119e307 100644
--- a/processing/src/main/java/org/carbondata/processing/util/CarbonSchemaParser.java
+++ b/processing/src/main/java/org/carbondata/processing/util/CarbonSchemaParser.java
@@ -1179,6 +1179,7 @@ public final class CarbonSchemaParser {
             childDim.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim
                 .getDataType() + CarbonCommonConstants.COLON_SPC_CHARACTER + dimension.getColName()
                 + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim.getColumnId()
+                + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim.getOrdinal()
                 + CarbonCommonConstants.HASH_SPC_CHARACTER);
       }
     }



Mime
View raw message