carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject carbondata git commit: [CARBONDATA-1291]:carbonData query performance improvement when number of carbon blocks are high
Date Fri, 15 Sep 2017 04:14:55 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 642b4bf73 -> 940f4d5e6


[CARBONDATA-1291]:carbonData query performance improvement when number of carbon blocks are
high

Limit query performance is slow when one load is having around 8400 carbondata files using
Spark Distribution
This issue came, when number of blocks are high in that case for each block it is listing
the delete delta file which is a expensive operation.
Solution- if IUD is done then only check for delete delta files

This closes #1324


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/940f4d5e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/940f4d5e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/940f4d5e

Branch: refs/heads/master
Commit: 940f4d5e62c05bd47b192690c67b4970cad38466
Parents: 642b4bf
Author: kushalsaha <kushalsaha1988@gmail.com>
Authored: Tue Sep 5 17:10:30 2017 +0530
Committer: Ravindra Pesala <ravi.pesala@gmail.com>
Committed: Fri Sep 15 09:44:38 2017 +0530

----------------------------------------------------------------------
 .../carbondata/hadoop/CarbonInputFormat.java     | 19 ++++++++++---------
 .../hadoop/api/CarbonTableInputFormat.java       | 19 ++++++++++---------
 2 files changed, 20 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
index fb3a637..4e8591e 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
@@ -460,21 +460,22 @@ public class CarbonInputFormat<T> extends FileInputFormat<Void,
T> {
       for (DataRefNode dataRefNode : dataRefNodes) {
         BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
         TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
+        String[] deleteDeltaFilePath = null;
         if (isIUDTable) {
           // In case IUD is not performed in this table avoid searching for
           // invalidated blocks.
           if (CarbonUtil
-              .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(),
-                  invalidBlockVOForSegmentId, updateStatusManager)) {
+                  .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(),
+                          invalidBlockVOForSegmentId, updateStatusManager)) {
             continue;
           }
-        }
-        String[] deleteDeltaFilePath = null;
-        try {
-          deleteDeltaFilePath =
-              updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath());
-        } catch (Exception e) {
-          throw new IOException(e);
+          // When iud is done then only get delete delta files for a block
+          try {
+            deleteDeltaFilePath =
+                    updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath());
+          } catch (Exception e) {
+            throw new IOException(e);
+          }
         }
         result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()),
             tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(),

http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index f271517..dcc75bd 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -448,21 +448,22 @@ public class CarbonTableInputFormat<T> extends FileInputFormat<Void,
T> {
         invalidBlockVOForSegmentId =
             updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId());
       }
+      String[] deleteDeltaFilePath = null;
       if (isIUDTable) {
         // In case IUD is not performed in this table avoid searching for
         // invalidated blocks.
         if (CarbonUtil
-            .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(),
-                invalidBlockVOForSegmentId, updateStatusManager)) {
+                .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(),
+                        invalidBlockVOForSegmentId, updateStatusManager)) {
           continue;
         }
-      }
-      String[] deleteDeltaFilePath = null;
-      try {
-        deleteDeltaFilePath =
-            updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString());
-      } catch (Exception e) {
-        throw new IOException(e);
+        // When iud is done then only get delete delta files for a block
+        try {
+          deleteDeltaFilePath =
+                  updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString());
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
       }
       inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath);
       result.add(inputSplit);


Mime
View raw message