Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 733AA108DD for ; Fri, 23 Oct 2015 17:54:22 +0000 (UTC) Received: (qmail 45711 invoked by uid 500); 23 Oct 2015 17:54:16 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 45667 invoked by uid 500); 23 Oct 2015 17:54:16 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 45656 invoked by uid 99); 23 Oct 2015 17:54:16 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 23 Oct 2015 17:54:16 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id EF6D4E01F5; Fri, 23 Oct 2015 17:54:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: harisankar@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-12084 : Hive queries with ORDER BY and large LIMIT fails with OutOfMemoryError Java heap space (Hari Subramaniyan, reviewed by Laljo John Pullokkaran) Date: Fri, 23 Oct 2015 17:54:15 +0000 (UTC) Repository: hive Updated Branches: refs/heads/branch-1 e35b908ea -> f4020cfce HIVE-12084 : Hive queries with ORDER BY and large LIMIT fails with OutOfMemoryError Java heap space (Hari Subramaniyan, reviewed by Laljo John Pullokkaran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f4020cfc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f4020cfc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f4020cfc Branch: refs/heads/branch-1 Commit: f4020cfce034cdcf91e362c106c15508b0ac16d8 Parents: e35b908 Author: Hari Subramaniyan Authored: Fri Oct 23 10:52:46 2015 -0700 Committer: Hari Subramaniyan Committed: Fri Oct 23 10:53:53 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/TopNHash.java | 2 +- ql/src/test/queries/clientpositive/topn.q | 13 ++++++ ql/src/test/results/clientpositive/topn.q.out | 42 ++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java index 484006a..8859add 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TopNHash.java @@ -104,7 +104,7 @@ public class TopNHash { } // limit * 64 : compensation of arrays for key/value/hashcodes - this.threshold = (long) (memUsage * Runtime.getRuntime().maxMemory()) - topN * 64; + this.threshold = (long) (memUsage * Runtime.getRuntime().freeMemory()) - topN * 64L; if (threshold < 0) { return; } http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/test/queries/clientpositive/topn.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/topn.q b/ql/src/test/queries/clientpositive/topn.q new file mode 100644 index 0000000..4821b5a --- /dev/null +++ b/ql/src/test/queries/clientpositive/topn.q @@ -0,0 +1,13 @@ +CREATE TABLE `sample_07` ( `code` string , `description` string , `total_emp` int , `salary` int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TextFile; +set hive.limit.pushdown.memory.usage=0.9999999; + +select * from sample_07 order by salary LIMIT 999999999; + +SELECT * FROM ( +SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank +FROM sample_07 +) ranked_claim +WHERE ranked_claim.rank < 2 +ORDER BY code; + +select sum(total_emp) over(partition by salary+salary order by code) from sample_07 limit 9999999; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/f4020cfc/ql/src/test/results/clientpositive/topn.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/topn.q.out b/ql/src/test/results/clientpositive/topn.q.out new file mode 100644 index 0000000..4846d8f --- /dev/null +++ b/ql/src/test/results/clientpositive/topn.q.out @@ -0,0 +1,42 @@ +PREHOOK: query: CREATE TABLE `sample_07` ( `code` string , `description` string , `total_emp` int , `salary` int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TextFile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sample_07 +POSTHOOK: query: CREATE TABLE `sample_07` ( `code` string , `description` string , `total_emp` int , `salary` int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TextFile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sample_07 +PREHOOK: query: select * from sample_07 order by salary LIMIT 999999999 +PREHOOK: type: QUERY +PREHOOK: Input: default@sample_07 +#### A masked pattern was here #### +POSTHOOK: query: select * from sample_07 order by salary LIMIT 999999999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sample_07 +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM ( +SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank +FROM sample_07 +) ranked_claim +WHERE ranked_claim.rank < 2 +ORDER BY code +PREHOOK: type: QUERY +PREHOOK: Input: default@sample_07 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( +SELECT *, rank() over(PARTITION BY code ORDER BY salary DESC) as rank +FROM sample_07 +) ranked_claim +WHERE ranked_claim.rank < 2 +ORDER BY code +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sample_07 +#### A masked pattern was here #### +PREHOOK: query: select sum(total_emp) over(partition by salary+salary order by code) from sample_07 limit 9999999 +PREHOOK: type: QUERY +PREHOOK: Input: default@sample_07 +#### A masked pattern was here #### +POSTHOOK: query: select sum(total_emp) over(partition by salary+salary order by code) from sample_07 limit 9999999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sample_07 +#### A masked pattern was here ####