carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gvram...@apache.org
Subject [1/2] incubator-carbondata git commit: [CARBONDATA-794] Numeric dimension column value should be validated for the bad record
Date Mon, 20 Mar 2017 15:03:55 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master d6d5eca30 -> a1b8afaff


[CARBONDATA-794] Numeric dimension column value should be validated for the bad record


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/3bc9152e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/3bc9152e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/3bc9152e

Branch: refs/heads/master
Commit: 3bc9152e51b8771c68586c4e5e638be1c8acbd81
Parents: d6d5eca
Author: mohammadshahidkhan <mohdshahidkhan1987@gmail.com>
Authored: Sun Mar 19 23:21:40 2017 +0530
Committer: Venkata Ramana G <ramana.gollamudi@huawei.com>
Committed: Mon Mar 20 20:27:45 2017 +0530

----------------------------------------------------------------------
 .../src/test/resources/badrecords/dummy.csv     |   4 +
 .../NumericDimensionBadRecordTest.scala         | 161 +++++++++++++++++++
 .../impl/DictionaryFieldConverterImpl.java      |  20 ++-
 .../converter/impl/FieldEncoderFactory.java     |   2 +-
 .../converter/impl/RowConverterImpl.java        |   2 +-
 5 files changed, 184 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/badrecords/dummy.csv b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
new file mode 100644
index 0000000..39bf37f
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/badrecords/dummy.csv
@@ -0,0 +1,4 @@
+name,dob,weight
+\N,\N,1
+,,xfds
+"","",""
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
new file mode 100644
index 0000000..1301f5d
--- /dev/null
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/datatype/NumericDimensionBadRecordTest.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.badrecordloger
+
+import java.io.File
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.hive.HiveContext
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for detailed query on timestamp dataDataTypes
+ *
+ *
+ */
+class NumericDimensionBadRecordTest extends QueryTest with BeforeAndAfterAll {
+  var hiveContext: HiveContext = _
+
+  override def beforeAll {
+    try {
+      sql("drop table IF EXISTS intDataType")
+      sql("drop table IF EXISTS longDataType")
+      sql("drop table IF EXISTS doubleDataType")
+      sql("drop table IF EXISTS floatDataType")
+      sql("drop table IF EXISTS bigDecimalDataType")
+      sql("drop table IF EXISTS stringDataType")
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+          new File("./target/test/badRecords")
+            .getCanonicalPath)
+      CarbonProperties.getInstance()
+        .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+      var csvFilePath = ""
+
+      // 1. bad record int DataType dimension
+      sql("create table intDataType(name String, dob timestamp, weight int)" +
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table intDataType options "
+
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 2. bad record long DataType dimension
+      sql("create table longDataType(name String, dob timestamp, weight long)" +
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table longDataType options "
+
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 3. bad record double DataType dimension
+      sql("create table doubleDataType(name String, dob timestamp, weight double)" +
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table doubleDataType options
" +
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+      // 4. bad record float DataType dimension
+      sql("create table floatDataType(name String, dob timestamp, weight float)" +
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table floatDataType options
" +
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+      // 5. bad record decimal DataType dimension
+      sql("create table bigDecimalDataType(name String, dob timestamp, weight decimal(3,1))"
+
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table bigDecimalDataType options
" +
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+      // 6. bad record string DataType dimension
+      sql("create table stringDataType(name String, dob timestamp, weight String)" +
+          " STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='weight')")
+      csvFilePath = s"$resourcesPath/badrecords/dummy.csv"
+      sql("LOAD DATA local inpath '" + csvFilePath + "' INTO table stringDataType options
" +
+          "('BAD_RECORDS_LOGGER_ENABLE'='true','BAD_RECORDS_ACTION'='IGNORE')");
+
+    } catch {
+      case x: Throwable => {
+        System.out.println(x.getMessage)
+        CarbonProperties.getInstance()
+          .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+      }
+    }
+  }
+
+   test("select count(*) from intDataType") {
+    checkAnswer(
+      sql("select count(*) from intDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from longDataType") {
+    checkAnswer(
+      sql("select count(*) from longDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+
+  test("select count(*) from doubleDataType") {
+    checkAnswer(
+      sql("select count(*) from doubleDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from floatDataType") {
+    checkAnswer(
+      sql("select count(*) from floatDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from bigDecimalDataType") {
+    checkAnswer(
+      sql("select count(*) from bigDecimalDataType"),
+      Seq(Row(2)
+      )
+    )
+  }
+
+  test("select count(*) from stringDataType") {
+    checkAnswer(
+      sql("select count(*) from stringDataType"),
+      Seq(Row(3)
+      )
+    )
+  }
+
+  override def afterAll {
+    sql("drop table IF EXISTS intDataType")
+    sql("drop table IF EXISTS longDataType")
+    sql("drop table IF EXISTS doubleDataType")
+    sql("drop table IF EXISTS floatDataType")
+    sql("drop table IF EXISTS bigDecimalDataType")
+    sql("drop table IF EXISTS stringDataType")
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
index ae5cadb..62073e2 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/DictionaryFieldConverterImpl.java
@@ -55,14 +55,17 @@ public class DictionaryFieldConverterImpl extends AbstractDictionaryFieldConvert
 
   private DictionaryMessage dictionaryMessage;
 
+  private boolean isEmptyBadRecord;
+
   public DictionaryFieldConverterImpl(DataField dataField,
       Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache,
       CarbonTableIdentifier carbonTableIdentifier, String nullFormat, int index,
       DictionaryClient client, boolean useOnePass, String storePath, boolean tableInitialize,
-      Map<Object, Integer> localCache) throws IOException {
+      Map<Object, Integer> localCache, boolean isEmptyBadRecord) throws IOException
{
     this.index = index;
     this.carbonDimension = (CarbonDimension) dataField.getColumn();
     this.nullFormat = nullFormat;
+    this.isEmptyBadRecord = isEmptyBadRecord;
     DictionaryColumnUniqueIdentifier identifier =
         new DictionaryColumnUniqueIdentifier(carbonTableIdentifier,
             dataField.getColumn().getColumnIdentifier(), dataField.getColumn().getDataType());
@@ -94,8 +97,19 @@ public class DictionaryFieldConverterImpl extends AbstractDictionaryFieldConvert
   @Override public void convert(CarbonRow row, BadRecordLogHolder logHolder)
       throws CarbonDataLoadingException {
     try {
-      String parsedValue = DataTypeUtil.parseValue(row.getString(index), carbonDimension);
-      if (null == parsedValue || parsedValue.equals(nullFormat)) {
+      String dimensionValue = row.getString(index);
+      if (dimensionValue == null || dimensionValue.equals(nullFormat)) {
+        dimensionValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
+      }
+      String parsedValue = DataTypeUtil.parseValue(dimensionValue, carbonDimension);
+      if (null == parsedValue) {
+        if ((dimensionValue.length() > 0) || (dimensionValue.length() == 0 &&
isEmptyBadRecord)) {
+          String dataType = carbonDimension.getDataType().getName();
+          logHolder.setReason(
+              "The value " + " \"" + dimensionValue + "\"" + " with column name " + carbonDimension
+                  .getColName() + " and column data type " + dataType + " is not a valid
"
+                  + dataType + " type.");
+        }
         row.update(CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY, index);
       } else {
         row.update(dictionaryGenerator.getOrGenerateKey(parsedValue), index);

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
index 660f256..158f3f0 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/FieldEncoderFactory.java
@@ -75,7 +75,7 @@ public class FieldEncoderFactory {
       } else if (dataField.getColumn().hasEncoding(Encoding.DICTIONARY) &&
           !dataField.getColumn().isComplex()) {
         return new DictionaryFieldConverterImpl(dataField, cache, carbonTableIdentifier,
nullFormat,
-            index, client, useOnePass, storePath, tableInitialize, localCache);
+            index, client, useOnePass, storePath, tableInitialize, localCache, isEmptyBadRecord);
       } else if (dataField.getColumn().isComplex()) {
         return new ComplexFieldConverterImpl(
             createComplexType(dataField, cache, carbonTableIdentifier,

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3bc9152e/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
index 3ba7bdf..2471314 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/RowConverterImpl.java
@@ -155,7 +155,7 @@ public class RowConverterImpl implements RowConverter {
       fieldConverters[i].convert(row, logHolder);
       if (!logHolder.isLogged() && logHolder.isBadRecordNotAdded()) {
         if (badRecordLogger.isDataLoadFail()) {
-          String error = "Data load failed due to bad bad record: " + logHolder.getReason();
+          String error = "Data load failed due to bad record: " + logHolder.getReason();
           throw new CarbonDataLoadingException(error);
         }
         badRecordLogger.addBadRecordsToBuilder(copy.getData(), logHolder.getReason());


Mime
View raw message