spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-22800][TEST][SQL] Add a SSB query suite
Date Fri, 15 Dec 2017 17:56:26 GMT
Repository: spark
Updated Branches:
  refs/heads/master e58f27567 -> 9fafa8209


[SPARK-22800][TEST][SQL] Add a SSB query suite

## What changes were proposed in this pull request?
Add a test suite to ensure all the [SSB (Star Schema Benchmark)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF)
queries can be successfully analyzed, optimized and compiled without hitting the max iteration
threshold.

## How was this patch tested?
Added `SSBQuerySuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #19990 from maropu/SPARK-22800.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9fafa820
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9fafa820
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9fafa820

Branch: refs/heads/master
Commit: 9fafa8209c51adc2a22b89aedf9af7b5e29e0059
Parents: e58f275
Author: Takeshi Yamamuro <yamamuro@apache.org>
Authored: Fri Dec 15 09:56:22 2017 -0800
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Fri Dec 15 09:56:22 2017 -0800

----------------------------------------------------------------------
 sql/core/src/test/resources/ssb/1.1.sql         |  6 ++
 sql/core/src/test/resources/ssb/1.2.sql         |  6 ++
 sql/core/src/test/resources/ssb/1.3.sql         |  6 ++
 sql/core/src/test/resources/ssb/2.1.sql         |  9 ++
 sql/core/src/test/resources/ssb/2.2.sql         |  9 ++
 sql/core/src/test/resources/ssb/2.3.sql         |  9 ++
 sql/core/src/test/resources/ssb/3.1.sql         | 10 +++
 sql/core/src/test/resources/ssb/3.2.sql         | 10 +++
 sql/core/src/test/resources/ssb/3.3.sql         | 12 +++
 sql/core/src/test/resources/ssb/3.4.sql         | 12 +++
 sql/core/src/test/resources/ssb/4.1.sql         | 11 +++
 sql/core/src/test/resources/ssb/4.2.sql         | 12 +++
 sql/core/src/test/resources/ssb/4.3.sql         | 12 +++
 .../org/apache/spark/sql/SSBQuerySuite.scala    | 87 ++++++++++++++++++++
 .../org/apache/spark/sql/TPCHQuerySuite.scala   |  2 -
 15 files changed, 211 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.1.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/1.1.sql b/sql/core/src/test/resources/ssb/1.1.sql
new file mode 100644
index 0000000..62da302
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.1.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_year = 1993
+		and lo_discount between 1 and 3
+		and lo_quantity < 25

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.2.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/1.2.sql b/sql/core/src/test/resources/ssb/1.2.sql
new file mode 100644
index 0000000..1657bfd
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.2.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_yearmonthnum = 199401
+		and lo_discount between 4 and 6
+		and lo_quantity between 26 and 35

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/1.3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/1.3.sql b/sql/core/src/test/resources/ssb/1.3.sql
new file mode 100644
index 0000000..e9bbf51
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/1.3.sql
@@ -0,0 +1,6 @@
+select sum(lo_extendedprice*lo_discount) as revenue
+	from lineorder, date
+	where lo_orderdate = d_datekey
+		and d_weeknuminyear = 6 and d_year = 1994
+		and lo_discount between 5 and 7
+		and lo_quantity between 36 and 40

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.1.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/2.1.sql b/sql/core/src/test/resources/ssb/2.1.sql
new file mode 100644
index 0000000..00d4027
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.1.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_category = 'MFGR#12'
+		and s_region = 'AMERICA'
+	group by d_year, p_brand1
+	order by d_year, p_brand1

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.2.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/2.2.sql b/sql/core/src/test/resources/ssb/2.2.sql
new file mode 100644
index 0000000..c53a925
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.2.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_brand1 between 'MFGR#2221' and 'MFGR#2228'
+		and s_region = 'ASIA'
+	group by d_year, p_brand1
+	order by d_year, p_brand1

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/2.3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/2.3.sql b/sql/core/src/test/resources/ssb/2.3.sql
new file mode 100644
index 0000000..e36530c
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/2.3.sql
@@ -0,0 +1,9 @@
+select sum(lo_revenue), d_year, p_brand1
+	from lineorder, date, part, supplier
+	where lo_orderdate = d_datekey
+		and lo_partkey = p_partkey
+		and lo_suppkey = s_suppkey
+		and p_brand1 = 'MFGR#2221'
+		and s_region = 'EUROPE'
+	group by d_year, p_brand1
+	order by d_year, p_brand1

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.1.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/3.1.sql b/sql/core/src/test/resources/ssb/3.1.sql
new file mode 100644
index 0000000..663ec3f
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.1.sql
@@ -0,0 +1,10 @@
+select c_nation, s_nation, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_region = 'ASIA'
+		and s_region = 'ASIA'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_nation, s_nation, d_year
+	order by d_year asc, revenue desc

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.2.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/3.2.sql b/sql/core/src/test/resources/ssb/3.2.sql
new file mode 100644
index 0000000..e1eaf10
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.2.sql
@@ -0,0 +1,10 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED STATES'
+		and s_nation = 'UNITED STATES'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/3.3.sql b/sql/core/src/test/resources/ssb/3.3.sql
new file mode 100644
index 0000000..f2cb44f
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.3.sql
@@ -0,0 +1,12 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED KINGDOM'
+		and (c_city='UNITED KI1' or c_city='UNITED KI5')
+		and (s_city='UNITED KI1' or s_city='UNITED KI5')
+		and s_nation = 'UNITED KINGDOM'
+		and d_year >= 1992 and d_year <= 1997
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/3.4.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/3.4.sql b/sql/core/src/test/resources/ssb/3.4.sql
new file mode 100644
index 0000000..936f246
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/3.4.sql
@@ -0,0 +1,12 @@
+select c_city, s_city, d_year, sum(lo_revenue) as revenue
+	from customer, lineorder, supplier, date
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_orderdate = d_datekey
+		and c_nation = 'UNITED KINGDOM'
+		and (c_city='UNITED KI1' or c_city='UNITED KI5')
+		and (s_city='UNITED KI1' or s_city='UNITED KI5')
+		and s_nation = 'UNITED KINGDOM'
+		and d_yearmonth = 'Dec1997'
+	group by c_city, s_city, d_year
+	order by d_year asc, revenue desc

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.1.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/4.1.sql b/sql/core/src/test/resources/ssb/4.1.sql
new file mode 100644
index 0000000..af19ecd
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.1.sql
@@ -0,0 +1,11 @@
+select d_year, c_nation, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_region = 'AMERICA'
+		and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
+	group by d_year, c_nation
+	order by d_year, c_nation

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.2.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/4.2.sql b/sql/core/src/test/resources/ssb/4.2.sql
new file mode 100644
index 0000000..2ffe6e2
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.2.sql
@@ -0,0 +1,12 @@
+select d_year, s_nation, p_category, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_region = 'AMERICA'
+		and (d_year = 1997 or d_year = 1998)
+		and (p_mfgr = 'MFGR#1' or p_mfgr = 'MFGR#2')
+	group by d_year, s_nation, p_category
+	order by d_year, s_nation, p_category

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/resources/ssb/4.3.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/ssb/4.3.sql b/sql/core/src/test/resources/ssb/4.3.sql
new file mode 100644
index 0000000..e24e2cc
--- /dev/null
+++ b/sql/core/src/test/resources/ssb/4.3.sql
@@ -0,0 +1,12 @@
+select d_year, s_city, p_brand1, sum(lo_revenue-lo_supplycost) as profit1
+	from date, customer, supplier, part, lineorder
+	where lo_custkey = c_custkey
+		and lo_suppkey = s_suppkey
+		and lo_partkey = p_partkey
+		and lo_orderdate = d_datekey
+		and c_region = 'AMERICA'
+		and s_nation = 'UNITED STATES'
+		and (d_year = 1997 or d_year = 1998)
+		and p_category = 'MFGR#14'
+	group by d_year, s_city, p_brand1
+	order by d_year, s_city, p_brand1

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
new file mode 100644
index 0000000..9a0c61b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.util.resourceToString
+
+/**
+ * This test suite ensures all the Star Schema Benchmark queries can be successfully analyzed,
+ * optimized and compiled without hitting the max iteration threshold.
+ */
+class SSBQuerySuite extends BenchmarkQueryTest {
+
+  override def beforeAll {
+    super.beforeAll
+
+    sql(
+      """
+        |CREATE TABLE `part` (`p_partkey` INT, `p_name` STRING, `p_mfgr` STRING,
+        |`p_category` STRING, `p_brand1` STRING, `p_color` STRING, `p_type` STRING, `p_size`
INT,
+        |`p_container` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `supplier` (`s_suppkey` INT, `s_name` STRING, `s_address` STRING,
+        |`s_city` STRING, `s_nation` STRING, `s_region` STRING, `s_phone` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `customer` (`c_custkey` INT, `c_name` STRING, `c_address` STRING,
+        |`c_city` STRING, `c_nation` STRING, `c_region` STRING, `c_phone` STRING,
+        |`c_mktsegment` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `date` (`d_datekey` INT, `d_date` STRING, `d_dayofweek` STRING,
+        |`d_month` STRING, `d_year` INT, `d_yearmonthnum` INT, `d_yearmonth` STRING,
+        |`d_daynuminweek` INT, `d_daynuminmonth` INT, `d_daynuminyear` INT, `d_monthnuminyear`
INT,
+        |`d_weeknuminyear` INT, `d_sellingseason` STRING, `d_lastdayinweekfl` STRING,
+        |`d_lastdayinmonthfl` STRING, `d_holidayfl` STRING, `d_weekdayfl` STRING)
+        |USING parquet
+      """.stripMargin)
+
+    sql(
+      """
+        |CREATE TABLE `lineorder` (`lo_orderkey` INT, `lo_linenumber` INT, `lo_custkey` INT,
+        |`lo_partkey` INT, `lo_suppkey` INT, `lo_orderdate` INT, `lo_orderpriority` STRING,
+        |`lo_shippriority` STRING, `lo_quantity` INT, `lo_extendedprice` INT,
+        |`lo_ordertotalprice` INT, `lo_discount` INT, `lo_revenue` INT, `lo_supplycost` INT,
+        |`lo_tax` INT, `lo_commitdate` INT, `lo_shipmode` STRING)
+        |USING parquet
+      """.stripMargin)
+  }
+
+  val ssbQueries = Seq(
+    "1.1", "1.2", "1.3", "2.1", "2.2", "2.3", "3.1", "3.2", "3.3", "3.4", "4.1", "4.2", "4.3")
+
+  ssbQueries.foreach { name =>
+    val queryString = resourceToString(s"ssb/$name.sql",
+      classLoader = Thread.currentThread.getContextClassLoader)
+    test(name) {
+      // check the plans can be properly generated
+      val plan = sql(queryString).queryExecution.executedPlan
+      checkGeneratedCode(plan)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/9fafa820/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
index 69ac92e..e3e7005 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.resourceToString
-import org.apache.spark.util.Utils
 
 /**
  * This test suite ensures all the TPC-H queries can be successfully analyzed, optimized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message