asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From buyin...@apache.org
Subject asterixdb git commit: Expected Hash Table footprint size calculation fix
Date Fri, 12 May 2017 16:31:56 GMT
Repository: asterixdb
Updated Branches:
  refs/heads/master b13c991f5 -> 5a02c1576


Expected Hash Table footprint size calculation fix

 - Fix the expected hash table footprint size calculation
   based on the cardinality. An operand was missing.

Change-Id: I9eb658a189fcf3d68978f627959c67d0c2641a29
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1738
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/5a02c157
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/5a02c157
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/5a02c157

Branch: refs/heads/master
Commit: 5a02c157686f0b4bb5d6210f314abe1560b060fd
Parents: b13c991
Author: Taewoo Kim <wangsaeu@yahoo.com>
Authored: Thu May 11 22:25:57 2017 -0700
Committer: Yingyi Bu <buyingyi@gmail.com>
Committed: Fri May 12 09:31:37 2017 -0700

----------------------------------------------------------------------
 .../resources/benchmarks/tpch/queries/q7.sqlpp  |  2 -
 .../benchmarks/tpch/queries/q7_variant.sqlpp    | 57 ++++++++++++++++++++
 ...timizedHybridHashJoinOperatorDescriptor.java |  4 +-
 .../structures/SimpleSerializableHashTable.java |  2 +-
 .../ExternalGroupOperatorDescriptorTest.java    | 20 +++----
 5 files changed, 70 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5a02c157/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
index bc6a7a6..9a96ea4 100644
--- a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
+++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7.sqlpp
@@ -17,8 +17,6 @@
  * under the License.
  */
 
-// Error: sporadically dead node.
-
 USE tpch;
 
 WITH q7_volume_shipping_tmp AS

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5a02c157/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
new file mode 100644
index 0000000..5e71471
--- /dev/null
+++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/queries/q7_variant.sqlpp
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE tpch;
+
+WITH q7_volume_shipping_tmp AS
+(
+    SELECT n1.n_name AS supp_nation,
+           n2.n_name AS cust_nation,
+           n1.n_nationkey AS s_nationkey,
+           n2.n_nationkey AS c_nationkey
+    FROM  Nation as n1,
+          Nation as n2
+    WHERE (n1.n_name='FRANCE' AND n2.n_name='GERMANY') OR (n1.n_name='GERMANY' AND n2.n_name='FRANCE')
+)
+
+SELECT supp_nation, cust_nation, l_year, sum(volume) AS revenue
+FROM
+  (
+    SELECT t.supp_nation, t.cust_nation, GET_YEAR(l3.l_shipdate) AS l_year,
+           l3.l_extendedprice * (1 - l3.l_discount) AS volume
+    FROM q7_volume_shipping_tmp t JOIN
+        (
+           SELECT l2.l_shipdate, l2.l_extendedprice, l2.l_discount, l2.c_nationkey, s.s_nationkey
+           FROM Supplier s JOIN
+             (
+              SELECT l1.l_shipdate, l1.l_extendedprice, l1.l_discount, l1.l_suppkey, c.c_nationkey
+              FROM Customer c JOIN
+                 (
+                   SELECT l.l_shipdate, l.l_extendedprice, l.l_discount, l.l_suppkey, o.o_custkey
+                   FROM Orders o
+                   JOIN LineItem l ON o.o_orderkey = l.l_orderkey AND l.l_shipdate >=
'1995-01-01'
+                        AND l.l_shipdate <= '1996-12-31'
+               ) l1 ON c.c_custkey = l1.o_custkey
+
+            ) l2 ON s.s_suppkey = l2.l_suppkey
+         ) l3 ON t.c_nationkey = l3.c_nationkey AND t.s_nationkey = l3.s_nationkey
+   ) shipping
+GROUP BY supp_nation, cust_nation, l_year
+ORDER BY supp_nation, cust_nation, l_year;
+

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5a02c157/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index f699075..c44c583 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -480,8 +480,8 @@ public class OptimizedHybridHashJoinOperatorDescriptor extends AbstractOperatorD
                             hashFunctionGeneratorFactories).createPartitioner(level);
 
                     int frameSize = ctx.getInitialFrameSize();
-                    long buildPartSize = buildSideReader.getFileSize() / frameSize;
-                    long probePartSize = probeSideReader.getFileSize() / frameSize;
+                    long buildPartSize = (long) Math.ceil((double) buildSideReader.getFileSize()
/ (double) frameSize);
+                    long probePartSize = (long) Math.ceil((double) probeSideReader.getFileSize()
/ (double) frameSize);
                     int beforeMax = Math.max(buildSizeInTuple, probeSizeInTuple);
 
                     if (LOGGER.isLoggable(Level.FINE)) {

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5a02c157/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
index 5b7d364..b1d1f27 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/SimpleSerializableHashTable.java
@@ -442,7 +442,7 @@ public class SimpleSerializableHashTable implements ISerializableTable
{
      *         expected the byte size of the hash table
      */
     public static long getExpectedTableFrameCount(long tableSize, int frameSize) {
-        long numberOfHeaderFrame = (long) (Math.ceil((double) tableSize * 2 / (double) frameSize));
+        long numberOfHeaderFrame = (long) (Math.ceil((double) tableSize * 2 * getUnitSize()
/ (double) frameSize));
         long numberOfContentFrame = (long) (Math
                 .ceil(((double) getNumberOfEntryInSlot() * 2 * getUnitSize() * tableSize)
/ (double) frameSize));
         return numberOfHeaderFrame + numberOfContentFrame;

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/5a02c157/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
index 392aab5..794ff98 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupOperatorDescriptorTest.java
@@ -42,7 +42,7 @@ public class ExternalGroupOperatorDescriptorTest {
         int frameSize = 256;
         int resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 9);
+        Assert.assertTrue(resultCardinality == 10);
 
         // Sets the frame size to 128KB.
         frameSize = 128 * 1024;
@@ -51,31 +51,31 @@ public class ExternalGroupOperatorDescriptorTest {
         memoryBudgetInBytes = 1024 * 1024;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 19660);
+        Assert.assertTrue(resultCardinality == 20388);
 
         // Test 3: memory size: 100 MB, frame size: 128 KB, 1 column group-by
         memoryBudgetInBytes = 1024 * 1024 * 100;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 1937883);
+        Assert.assertTrue(resultCardinality == 2016724);
 
         // Test 4: memory size: 1 GB, frame size: 128 KB, 1 column group-by
         memoryBudgetInBytes = 1024 * 1024 * 1024;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 19841178);
+        Assert.assertTrue(resultCardinality == 20649113);
 
         // Test 5: memory size: 10 GB, frame size: 128 KB, 1 column group-by
         memoryBudgetInBytes = 1024 * 1024 * 1024 * 10L;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 198409112);
+        Assert.assertTrue(resultCardinality == 206489044);
 
         // Test 6: memory size: 100 GB, frame size: 128 KB, 1 column group-by
         memoryBudgetInBytes = 1024 * 1024 * 1024 * 100L;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 1962753871);
+        Assert.assertTrue(resultCardinality == 2045222521);
 
         // Test 7: memory size: 1 TB, frame size: 128 KB, 1 column group-by
         // The cardinality will be set to Integer.MAX_VALUE in this case since the budget
is too huge.
@@ -90,28 +90,28 @@ public class ExternalGroupOperatorDescriptorTest {
         numberOfGroupByColumns = 2;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 16681);
+        Assert.assertTrue(resultCardinality == 17825);
 
         // Test 9: memory size: 1 MB, frame size: 128 KB, 3 columns group-by
         memoryBudgetInBytes = 1024 * 1024;
         numberOfGroupByColumns = 3;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 15176);
+        Assert.assertTrue(resultCardinality == 16227);
 
         // Test 10: memory size: 1 MB, frame size: 128 KB, 4 columns group-by
         memoryBudgetInBytes = 1024 * 1024;
         numberOfGroupByColumns = 4;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 13878);
+        Assert.assertTrue(resultCardinality == 14563);
 
         // Test 11: memory size: 32 MB, frame size: 128 KB, 2 columns group-by
         memoryBudgetInBytes = 1024 * 1024 * 32L;
         numberOfGroupByColumns = 4;
         resultCardinality = (int) PA.invokeMethod(eGByOp, "calculateGroupByTableCardinality(long,int,int)",
                 memoryBudgetInBytes, numberOfGroupByColumns, frameSize);
-        Assert.assertTrue(resultCardinality == 408503);
+        Assert.assertTrue(resultCardinality == 441913);
     }
 
 }


Mime
View raw message