spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-22280][SQL][TEST] Improve StatisticsSuite to test `convertMetastore` properly
Date Mon, 16 Oct 2017 23:16:37 GMT
Repository: spark
Updated Branches:
  refs/heads/master 561505e2f -> c09a2a76b


[SPARK-22280][SQL][TEST] Improve StatisticsSuite to test `convertMetastore` properly

## What changes were proposed in this pull request?

This PR aims to improve **StatisticsSuite** to test `convertMetastore` configuration properly.
Currently, some test logic in `test statistics of LogicalRelation converted from Hive serde
tables` depends on the default configuration. New test case is shorter and covers both(true/false)
cases explicitly.

This test case was previously modified by SPARK-17410 and SPARK-17284 in Spark 2.3.0.
- https://github.com/apache/spark/commit/a2460be9c30b67b9159fe339d115b84d53cc288a#diff-1c464c86b68c2d0b07e73b7354e74ce7R443

## How was this patch tested?

Pass the Jenkins with the improved test case.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #19500 from dongjoon-hyun/SPARK-22280.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c09a2a76
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c09a2a76
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c09a2a76

Branch: refs/heads/master
Commit: c09a2a76b52905a784d2767cb899dc886c330628
Parents: 561505e
Author: Dongjoon Hyun <dongjoon@apache.org>
Authored: Mon Oct 16 16:16:34 2017 -0700
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Mon Oct 16 16:16:34 2017 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/StatisticsSuite.scala | 34 ++++++++------------
 1 file changed, 14 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c09a2a76/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 9ff9ecf..b9a5ad7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -937,26 +937,20 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   }
 
   test("test statistics of LogicalRelation converted from Hive serde tables") {
-    val parquetTable = "parquetTable"
-    val orcTable = "orcTable"
-    withTable(parquetTable, orcTable) {
-      sql(s"CREATE TABLE $parquetTable (key STRING, value STRING) STORED AS PARQUET")
-      sql(s"CREATE TABLE $orcTable (key STRING, value STRING) STORED AS ORC")
-      sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
-      sql(s"INSERT INTO TABLE $orcTable SELECT * FROM src")
-
-      // the default value for `spark.sql.hive.convertMetastoreParquet` is true, here we
just set it
-      // for robustness
-      withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
-        checkTableStats(parquetTable, hasSizeInBytes = false, expectedRowCounts = None)
-        sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
-        checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
-      }
-      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
-        // We still can get tableSize from Hive before Analyze
-        checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = None)
-        sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
-        checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
+    Seq("orc", "parquet").foreach { format =>
+      Seq(true, false).foreach { isConverted =>
+        withSQLConf(
+          HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted",
+          HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") {
+          withTable(format) {
+            sql(s"CREATE TABLE $format (key STRING, value STRING) STORED AS $format")
+            sql(s"INSERT INTO TABLE $format SELECT * FROM src")
+
+            checkTableStats(format, hasSizeInBytes = !isConverted, expectedRowCounts = None)
+            sql(s"ANALYZE TABLE $format COMPUTE STATISTICS")
+            checkTableStats(format, hasSizeInBytes = true, expectedRowCounts = Some(500))
+          }
+        }
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message