carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [20/50] [abbrv] carbondata git commit: [CARBONDATA-2722] [CARBONDATA-2721] JsonWriter issue fixes
Date Wed, 18 Jul 2018 02:19:53 GMT
[CARBONDATA-2722] [CARBONDATA-2721] JsonWriter issue fixes

[CARBONDATA-2722][SDK] [JsonWriter] NPE when schema and data are not of same length or Data
is null.

problem: Null data is not handled in the json object to carbon row conversion.

solution: add a null check when object is fetched from json map.

[CARBONDATA-2721][SDK] [JsonWriter] Json writer is writing only first element of an array
and discarding the rest of the elements.

problem: converting json object to carbon row array object is based on array children count
, instead of array element count.
Hence as array will always have one children. only one element is filled.

solution: use array element count instead of array children count

This closes #2485


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/653efee0
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/653efee0
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/653efee0

Branch: refs/heads/carbonstore
Commit: 653efee0283701d928562379f91e5df8fec73c24
Parents: 637a974
Author: ajantha-bhat <ajanthabhat@gmail.com>
Authored: Mon Jul 9 18:45:30 2018 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Fri Jul 13 20:49:56 2018 +0530

----------------------------------------------------------------------
 .../jsonFiles/data/PrimitiveTypeWithNull.json   |  4 +
 .../jsonFiles/data/StructOfAllTypes.json        |  2 +-
 .../jsonFiles/data/allPrimitiveType.json        |  2 +-
 ...tNonTransactionalCarbonTableJsonWriter.scala | 88 +++++++++++++++++---
 .../loading/parser/impl/JsonRowParser.java      | 51 ++++++++----
 5 files changed, 115 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json
b/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json
new file mode 100644
index 0000000..a5cd3d7
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/PrimitiveTypeWithNull.json
@@ -0,0 +1,4 @@
+{
+	"stringField": null,
+	"intField": 26
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
b/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
index 9806325..3beab07 100644
--- a/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
+++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
@@ -5,7 +5,7 @@
 		"longField": 12345678,
 		"doubleField": 123400.78,
 		"boolField": true,
-		"FloorNum": [ 1, 2],
+		"FloorNum": [1,2,3,4,5,6],
 		"FloorString": [ "abc", "def"],
 		"FloorLong": [ 1234567, 2345678],
 		"FloorDouble": [ 1.0, 2.0, 33.33],

http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
b/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
index 86648c3..6d81ec7 100644
--- a/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
+++ b/integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
@@ -1,5 +1,5 @@
 {
-	"stringField": "ajantha",
+	"stringField": "ajantha\"bhat\"",
 	"intField": 26,
 	"shortField": 26,
 	"longField": 1234567,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala
index 299c966..ff5c062 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableJsonWriter.scala
@@ -32,7 +32,6 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.metadata.datatype.DataTypes
 import org.apache.carbondata.core.util.CarbonProperties
 import org.apache.carbondata.sdk.file._
-
 import scala.collection.JavaConverters._
 
 class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAndAfterAll
{
@@ -94,7 +93,7 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with BeforeAnd
   private def writeCarbonFileFromJsonRowInput(jsonRow: String,
       carbonSchema: Schema) = {
     try {
-      var options: util.Map[String, String] = Map("bAd_RECords_action" -> "FAIL").asJava
+      var options: util.Map[String, String] = Map("bAd_RECords_action" -> "FAIL", "quotechar"
-> "\"").asJava
       val writer = CarbonWriter.builder
         .outputPath(writerPath).isTransactionalTable(false)
         .uniqueIdentifier(System.currentTimeMillis())
@@ -138,7 +137,7 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with
BeforeAnd
       s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
          |'$writerPath' """.stripMargin)
     checkAnswer(sql("select * from sdkOutputTable"),
-      Seq(Row("ajantha",
+      Seq(Row("ajantha\"bhat\"",
         26,
         26,
         1234567,
@@ -266,21 +265,86 @@ class TestNonTransactionalCarbonTableJsonWriter extends QueryTest with
BeforeAnd
     sql(
       s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
          |'$writerPath' """.stripMargin)
+    assert(sql("select * from sdkOutputTable").collectAsList().toString.equals(
+      "[[[bob,10,12345678,123400.78,true,WrappedArray(1, 2, 3, 4, 5, 6),WrappedArray(abc,
def)," +
+      "WrappedArray(1234567, 2345678),WrappedArray(1.0, 2.0, 33.33),WrappedArray(true, false,
" +
+      "false, true)]]]"))
+    sql("DROP TABLE sdkOutputTable")
+    // drop table should not delete the files
+    assert(new File(writerPath).listFiles().length > 0)
+    FileUtils.deleteDirectory(new File(writerPath))
+  }
 
-    sql("select * from sdkOutputTable").show(false)
-    /*
-    * +--------------------------------------------------------------------+
-      |StructColumn                                                        |
-      +--------------------------------------------------------------------+
-      |[bob,10,12345678,123400.78,true,WrappedArray(1),WrappedArray(abc),  |
-      | WrappedArray(1234567),WrappedArray(1.0),WrappedArray(true)]        |
-      +--------------------------------------------------------------------+
-    * */
+  // test : One element as null
+  test("Read sdk writer Json output of primitive type with one element as null") {
+    FileUtils.deleteDirectory(new File(writerPath))
+    var dataPath: String = null
+    dataPath = resourcesPath + "/jsonFiles/data/PrimitiveTypeWithNull.json"
+    val fields = new Array[Field](2)
+    fields(0) = new Field("stringField", DataTypes.STRING)
+    fields(1) = new Field("intField", DataTypes.INT)
+    val jsonRow = readFromFile(dataPath)
+    writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields))
+    assert(new File(writerPath).exists())
+    sql("DROP TABLE IF EXISTS sdkOutputTable")
+    sql(
+      s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+         |'$writerPath' """.stripMargin)
+    checkAnswer(sql("select * from sdkOutputTable"),
+      Seq(Row(null,
+        26)))
+    sql("DROP TABLE sdkOutputTable")
+    // drop table should not delete the files
+    assert(new File(writerPath).listFiles().length > 0)
+    FileUtils.deleteDirectory(new File(writerPath))
+  }
 
+  // test : Schema length is greater than array length
+  test("Read Json output of primitive type with Schema length is greater than array length")
{
+    FileUtils.deleteDirectory(new File(writerPath))
+    var dataPath: String = null
+    dataPath = resourcesPath + "/jsonFiles/data/PrimitiveTypeWithNull.json"
+    val fields = new Array[Field](5)
+    fields(0) = new Field("stringField", DataTypes.STRING)
+    fields(1) = new Field("intField", DataTypes.INT)
+    fields(2) = new Field("shortField", DataTypes.SHORT)
+    fields(3) = new Field("longField", DataTypes.LONG)
+    fields(4) = new Field("doubleField", DataTypes.DOUBLE)
+    val jsonRow = readFromFile(dataPath)
+    writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields))
+    assert(new File(writerPath).exists())
+    sql("DROP TABLE IF EXISTS sdkOutputTable")
+    sql(
+      s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+         |'$writerPath' """.stripMargin)
+    checkAnswer(sql("select * from sdkOutputTable"),
+      Seq(Row(null, 26, null, null, null)))
     sql("DROP TABLE sdkOutputTable")
     // drop table should not delete the files
     assert(new File(writerPath).listFiles().length > 0)
     FileUtils.deleteDirectory(new File(writerPath))
   }
 
+  // test : Schema length is lesser than array length
+  test("Read Json output of primitive type with Schema length is lesser than array length")
{
+    FileUtils.deleteDirectory(new File(writerPath))
+    var dataPath: String = null
+    dataPath = resourcesPath + "/jsonFiles/data/allPrimitiveType.json"
+    val fields = new Array[Field](2)
+    fields(0) = new Field("stringField", DataTypes.STRING)
+    fields(1) = new Field("intField", DataTypes.INT)
+    val jsonRow = readFromFile(dataPath)
+    writeCarbonFileFromJsonRowInput(jsonRow, new Schema(fields))
+    assert(new File(writerPath).exists())
+    sql("DROP TABLE IF EXISTS sdkOutputTable")
+    sql(
+      s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION
+         |'$writerPath' """.stripMargin)
+    checkAnswer(sql("select * from sdkOutputTable"),
+      Seq(Row("ajantha\"bhat\"", 26)))
+    sql("DROP TABLE sdkOutputTable")
+    // drop table should not delete the files
+    assert(new File(writerPath).listFiles().length > 0)
+    FileUtils.deleteDirectory(new File(writerPath))
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/653efee0/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java
b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java
index 6b06d89..c4f6074 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/parser/impl/JsonRowParser.java
@@ -56,6 +56,9 @@ public class JsonRowParser implements RowParser {
       Map<String, Object> jsonNodeMap =
           objectMapper.readValue(jsonString, new TypeReference<Map<String, Object>>()
{
           });
+      if (jsonNodeMap == null) {
+        return null;
+      }
       return jsonToCarbonRecord(jsonNodeMap, dataFields);
     } catch (IOException e) {
       throw new IOException("Failed to parse Json String: " + e.getMessage());
@@ -66,9 +69,7 @@ public class JsonRowParser implements RowParser {
     List<Object> fields = new ArrayList<>();
     for (DataField dataField : dataFields) {
       Object field = jsonToCarbonObject(jsonNodeMap, dataField.getColumn());
-      if (field != null) {
-        fields.add(field);
-      }
+      fields.add(field);
     }
     // use this array object to form carbonRow
     return fields.toArray();
@@ -78,12 +79,16 @@ public class JsonRowParser implements RowParser {
     DataType type = column.getDataType();
     if (DataTypes.isArrayType(type)) {
       CarbonDimension carbonDimension = (CarbonDimension) column;
-      int size = carbonDimension.getNumberOfChild();
       ArrayList array = (ArrayList) jsonNodeMap.get(extractChildColumnName(column));
+      if ((array == null) || (array.size() == 0)) {
+        return null;
+      }
       // stored as array in carbonObject
-      Object[] arrayChildObjects = new Object[size];
-      for (int i = 0; i < size; i++) {
-        CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(i);
+      Object[] arrayChildObjects = new Object[array.size()];
+      for (int i = 0; i < array.size(); i++) {
+        // array column will have only one child, hence get(0).
+        // But data can have n elements, hence the loop.
+        CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(0);
         arrayChildObjects[i] = jsonChildElementToCarbonChildElement(array.get(i), childCol);
       }
       return new ArrayObject(arrayChildObjects);
@@ -92,33 +97,45 @@ public class JsonRowParser implements RowParser {
       int size = carbonDimension.getNumberOfChild();
       Map<String, Object> jsonMap =
           (Map<String, Object>) jsonNodeMap.get(extractChildColumnName(column));
+      if (jsonMap == null) {
+        return null;
+      }
       Object[] structChildObjects = new Object[size];
       for (int i = 0; i < size; i++) {
         CarbonDimension childCol = carbonDimension.getListOfChildDimensions().get(i);
         Object childObject =
             jsonChildElementToCarbonChildElement(jsonMap.get(extractChildColumnName(childCol)),
                 childCol);
-        if (childObject != null) {
-          structChildObjects[i] = childObject;
-        }
+        structChildObjects[i] = childObject;
       }
       return new StructObject(structChildObjects);
     } else {
       // primitive type
+      if (jsonNodeMap.get(extractChildColumnName(column)) == null) {
+        return null;
+      }
       return jsonNodeMap.get(extractChildColumnName(column)).toString();
     }
   }
 
   private Object jsonChildElementToCarbonChildElement(Object childObject,
       CarbonDimension column) {
+    if (childObject == null) {
+      return null;
+    }
     DataType type = column.getDataType();
     if (DataTypes.isArrayType(type)) {
-      int size = column.getNumberOfChild();
       ArrayList array = (ArrayList) childObject;
+      if (array.size() == 0) {
+        // handling empty array
+        return null;
+      }
       // stored as array in carbonObject
-      Object[] arrayChildObjects = new Object[size];
-      for (int i = 0; i < size; i++) {
-        CarbonDimension childCol = column.getListOfChildDimensions().get(i);
+      Object[] arrayChildObjects = new Object[array.size()];
+      for (int i = 0; i < array.size(); i++) {
+        // array column will have only one child, hence get(0).
+        // But data can have n elements, hence the loop.
+        CarbonDimension childCol = column.getListOfChildDimensions().get(0);
         arrayChildObjects[i] = jsonChildElementToCarbonChildElement(array.get(i), childCol);
       }
       return new ArrayObject(arrayChildObjects);
@@ -130,9 +147,7 @@ public class JsonRowParser implements RowParser {
         CarbonDimension childCol = column.getListOfChildDimensions().get(i);
         Object child = jsonChildElementToCarbonChildElement(
             childFieldsMap.get(extractChildColumnName(childCol)), childCol);
-        if (child != null) {
-          structChildObjects[i] = child;
-        }
+        structChildObjects[i] = child;
       }
       return new StructObject(structChildObjects);
     } else {
@@ -140,6 +155,7 @@ public class JsonRowParser implements RowParser {
       return childObject.toString();
     }
   }
+
   private static String extractChildColumnName(CarbonColumn column) {
     String columnName = column.getColName();
     if (columnName.contains(".")) {
@@ -153,5 +169,4 @@ public class JsonRowParser implements RowParser {
     }
     return columnName;
   }
-
 }


Mime
View raw message