carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kumarvisha...@apache.org
Subject carbondata git commit: [CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3
Date Thu, 21 Jun 2018 05:30:52 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 2ea3b2dc5 -> 091a28bf8


[CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3

Since we support super long string, if it is long enough, a column page
with 32000 rows will exceed 2GB, so we support a page less than 32000
rows.

This closes #2383


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/091a28bf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/091a28bf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/091a28bf

Branch: refs/heads/master
Commit: 091a28bf833a5296dd3878ddb11b243f7f37a8fc
Parents: 2ea3b2d
Author: xuchuanyin <xuchuanyin@hust.edu.cn>
Authored: Wed Jun 20 19:07:03 2018 +0800
Committer: kumarvishal09 <kumarvishal1802@gmail.com>
Committed: Thu Jun 21 11:00:02 2018 +0530

----------------------------------------------------------------------
 .../testsuite/dataload/TestLoadDataGeneral.scala    | 16 ++++++++++++++++
 .../store/CarbonFactDataHandlerColumnar.java        |  7 ++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index 688928f..8b51090 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -259,6 +259,22 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
       CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS,
       originStatus)
   }
+
+  test("test data loading with page size less than 32000") {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.BLOCKLET_SIZE, "16000")
+
+    val testData = s"$resourcesPath/sample.csv"
+    sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM loadtest"),
+      Seq(Row(6))
+    )
+
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
+      CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
+  }
+
   override def afterEach {
     sql("DROP TABLE if exists loadtest")
     sql("drop table if exists invalidMeasures")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index c0acadd..5fe3261 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -371,8 +371,13 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler
{
     this.pageSize = Integer.parseInt(CarbonProperties.getInstance()
         .getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
             CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
+    // support less than 32000 rows in one page, because we support super long string,
+    // if it is long enough, a clomun page with 32000 rows will exceed 2GB
     if (version == ColumnarFormatVersion.V3) {
-      this.pageSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
+      this.pageSize =
+          pageSize < CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT
?
+              pageSize :
+              CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
     }
     LOGGER.info("Number of rows per column blocklet " + pageSize);
     dataRows = new ArrayList<>(this.pageSize);


Mime
View raw message