carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [03/42] carbondata git commit: [CARBONDATA-909] Added option to specify single pass load in data frame
Date Thu, 15 Jun 2017 11:50:08 GMT
[CARBONDATA-909] Added option to specify single pass load in data frame


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/38a51449
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/38a51449
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/38a51449

Branch: refs/heads/branch-1.1
Commit: 38a51449ba9a35706bae2a78628a9654f0634960
Parents: 01048f8
Author: Sanoj MG <sanoj.george.dev@gmail.com>
Authored: Wed Apr 12 22:32:51 2017 +0400
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Thu Jun 15 12:44:00 2017 +0530

----------------------------------------------------------------------
 .../testsuite/dataload/TestLoadDataFrame.scala  | 32 ++++++++++++++++++++
 .../spark/CarbonDataFrameWriter.scala           |  3 +-
 2 files changed, 34 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/38a51449/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataFrame.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataFrame.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataFrame.scala
index f50620f..9179c08 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataFrame.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataFrame.scala
@@ -59,6 +59,8 @@ class TestLoadDataFrame extends QueryTest with BeforeAndAfterAll {
     sql("DROP TABLE IF EXISTS carbon5")
     sql("DROP TABLE IF EXISTS carbon6")
     sql("DROP TABLE IF EXISTS carbon7")
+    sql("DROP TABLE IF EXISTS carbon8")
+    sql("DROP TABLE IF EXISTS carbon9")
   }
 
 
@@ -167,6 +169,36 @@ class TestLoadDataFrame extends QueryTest with BeforeAndAfterAll {
     )
   }
 
+  test("test load dataframe with single pass enabled") {
+    // save dataframe to carbon file
+    df.write
+      .format("carbondata")
+      .option("tableName", "carbon8")
+      .option("tempCSV", "false")
+      .option("single_pass", "true")
+      .option("compress", "false")
+      .mode(SaveMode.Overwrite)
+      .save()
+    checkAnswer(
+      sql("select count(*) from carbon8 where c3 > 500"), Row(500)
+    )
+  }
+
+  test("test load dataframe with single pass disabled") {
+    // save dataframe to carbon file
+    df.write
+      .format("carbondata")
+      .option("tableName", "carbon9")
+      .option("tempCSV", "true")
+      .option("single_pass", "false")
+      .option("compress", "false")
+      .mode(SaveMode.Overwrite)
+      .save()
+    checkAnswer(
+      sql("select count(*) from carbon9 where c3 > 500"), Row(500)
+    )
+  }
+
   override def afterAll {
     dropTable
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/38a51449/integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonDataFrameWriter.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonDataFrameWriter.scala
b/integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonDataFrameWriter.scala
index 36e2440..0d1b1df 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonDataFrameWriter.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/CarbonDataFrameWriter.scala
@@ -193,7 +193,8 @@ class CarbonDataFrameWriter(val dataFrame: DataFrame) {
     s"""
           LOAD DATA INPATH '$csvFolder'
           INTO TABLE ${options.dbName}.${options.tableName}
-          OPTIONS ('FILEHEADER' = '${dataFrame.columns.mkString(",")}')
+          OPTIONS ('FILEHEADER' = '${dataFrame.columns.mkString(",")}',
+          'SINGLE_PASS' = '${options.singlePass}')
       """
   }
 


Mime
View raw message