spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cloud-fan <...@git.apache.org>
Subject [GitHub] spark pull request #19714: [SPARK-22489][SQL] Shouldn't change broadcast joi...
Date Wed, 29 Nov 2017 12:18:17 GMT
Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19714#discussion_r153771297
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
---
    @@ -223,4 +223,69 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
         assert(HashJoin.rewriteKeyExpr(l :: ss :: Nil) === l :: ss :: Nil)
         assert(HashJoin.rewriteKeyExpr(i :: ss :: Nil) === i :: ss :: Nil)
       }
    +
    +  test("Shouldn't change broadcast join buildSide if user clearly specified") {
    +    def assertJoinBuildSide(pair: (String, String, BuildSide)): Any = {
    +      val (sqlString, joinMethod, buildSide) = pair
    +      val executedPlan = sql(sqlString).queryExecution.executedPlan
    +      executedPlan match {
    +        case b: BroadcastNestedLoopJoinExec =>
    +          assert(b.getClass.getSimpleName === joinMethod)
    +          assert(b.buildSide === buildSide)
    +        case w: WholeStageCodegenExec =>
    +          assert(w.children.head.getClass.getSimpleName === joinMethod)
    +          assert(w.children.head.asInstanceOf[BroadcastHashJoinExec].buildSide === buildSide)
    +      }
    +    }
    +
    +    withTempView("t1", "t2") {
    +      spark.createDataFrame(Seq((1, "4"), (2, "2"))).toDF("key", "value").createTempView("t1")
    +      spark.createDataFrame(Seq((1, "1"), (2, "12.3"), (2, "123"))).toDF("key", "value")
    +        .createTempView("t2")
    +
    +      val t1Size = spark.table("t1").queryExecution.analyzed.children.head.stats.sizeInBytes
    +      val t2Size = spark.table("t2").queryExecution.analyzed.children.head.stats.sizeInBytes
    +      assert(t1Size < t2Size)
    +
    +      val bh = BroadcastHashJoinExec.toString
    +      val bl = BroadcastNestedLoopJoinExec.toString
    +
    +      Seq(
    +        // INNER JOIN && t1Size < t2Size => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft),
    +        // LEFT JOIN => BuildRight
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 LEFT JOIN t2 ON t1.key = t2.key", bh,
BuildRight),
    +        // RIGHT JOIN => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1, t2) */ * FROM t1 RIGHT JOIN t2 ON t1.key = t2.key",
bh, BuildLeft),
    +        // INNER JOIN && broadcast(t1) => BuildLeft
    +        ("SELECT /*+ MAPJOIN(t1) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildLeft),
    +        // INNER JOIN && broadcast(t2) => BuildRight
    +        ("SELECT /*+ MAPJOIN(t2) */ * FROM t1 JOIN t2 ON t1.key = t2.key", bh, BuildRight)
    +      ).foreach(assertJoinBuildSide)
    --- End diff --
    
    I think it's more readable to write
    ```
    assertJoinBuildSide(...)
    assertJoinBuildSide(...)
    ...
    ```
    than
    ```
    Seq(
      ...
    ).foreach(assertJoinBuildSide)
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message