Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 59B3D200D0F for ; Fri, 15 Sep 2017 06:14:57 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 57D5D1609CE; Fri, 15 Sep 2017 04:14:57 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 9C4321609CD for ; Fri, 15 Sep 2017 06:14:56 +0200 (CEST) Received: (qmail 70298 invoked by uid 500); 15 Sep 2017 04:14:55 -0000 Mailing-List: contact commits-help@carbondata.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@carbondata.apache.org Delivered-To: mailing list commits@carbondata.apache.org Received: (qmail 70289 invoked by uid 99); 15 Sep 2017 04:14:55 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Sep 2017 04:14:55 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8E3D3F5593; Fri, 15 Sep 2017 04:14:55 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ravipesala@apache.org To: commits@carbondata.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: carbondata git commit: [CARBONDATA-1291]:carbonData query performance improvement when number of carbon blocks are high Date: Fri, 15 Sep 2017 04:14:55 +0000 (UTC) archived-at: Fri, 15 Sep 2017 04:14:57 -0000 Repository: carbondata Updated Branches: refs/heads/master 642b4bf73 -> 940f4d5e6 [CARBONDATA-1291]:carbonData query performance improvement when number of carbon blocks are high Limit query performance is slow when one load is having around 8400 carbondata files using Spark Distribution This issue came, when number of blocks are high in that case for each block it is listing the delete delta file which is a expensive operation. Solution- if IUD is done then only check for delete delta files This closes #1324 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/940f4d5e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/940f4d5e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/940f4d5e Branch: refs/heads/master Commit: 940f4d5e62c05bd47b192690c67b4970cad38466 Parents: 642b4bf Author: kushalsaha Authored: Tue Sep 5 17:10:30 2017 +0530 Committer: Ravindra Pesala Committed: Fri Sep 15 09:44:38 2017 +0530 ---------------------------------------------------------------------- .../carbondata/hadoop/CarbonInputFormat.java | 19 ++++++++++--------- .../hadoop/api/CarbonTableInputFormat.java | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java index fb3a637..4e8591e 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java @@ -460,21 +460,22 @@ public class CarbonInputFormat extends FileInputFormat { for (DataRefNode dataRefNode : dataRefNodes) { BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode; TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo(); + String[] deleteDeltaFilePath = null; if (isIUDTable) { // In case IUD is not performed in this table avoid searching for // invalidated blocks. if (CarbonUtil - .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), - invalidBlockVOForSegmentId, updateStatusManager)) { + .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } - } - String[] deleteDeltaFilePath = null; - try { - deleteDeltaFilePath = - updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath()); - } catch (Exception e) { - throw new IOException(e); + // When iud is done then only get delete delta files for a block + try { + deleteDeltaFilePath = + updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath()); + } catch (Exception e) { + throw new IOException(e); + } } result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java index f271517..dcc75bd 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java @@ -448,21 +448,22 @@ public class CarbonTableInputFormat extends FileInputFormat { invalidBlockVOForSegmentId = updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId()); } + String[] deleteDeltaFilePath = null; if (isIUDTable) { // In case IUD is not performed in this table avoid searching for // invalidated blocks. if (CarbonUtil - .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(), - invalidBlockVOForSegmentId, updateStatusManager)) { + .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } - } - String[] deleteDeltaFilePath = null; - try { - deleteDeltaFilePath = - updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString()); - } catch (Exception e) { - throw new IOException(e); + // When iud is done then only get delete delta files for a block + try { + deleteDeltaFilePath = + updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString()); + } catch (Exception e) { + throw new IOException(e); + } } inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath); result.add(inputSplit);