spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rxin <...@git.apache.org>
Subject [GitHub] spark pull request #21416: [SPARK-24371] [SQL] Added isInCollection in DataF...
Date Tue, 29 May 2018 05:24:45 GMT
Github user rxin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21416#discussion_r191306678
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala ---
    @@ -392,9 +396,97 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext
{
     
         val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
     
    -    intercept[AnalysisException] {
    +    val e = intercept[AnalysisException] {
           df2.filter($"a".isin($"b"))
         }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but
were")
    +      .foreach { s =>
    +        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Scala Collection") {
    +    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    // Auto casting should work with mixture of different types in collections
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1.toShort, "2"))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq("3", 2.toLong))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, "1"))),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    checkAnswer(df.filter($"b".isInCollection(Seq("y", "x"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "y" || r.getString(1) == "x"))
    +    checkAnswer(df.filter($"b".isInCollection(Seq("z", "x"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "x"))
    +    checkAnswer(df.filter($"b".isInCollection(Seq("z", "y"))),
    +      df.collect().toSeq.filter(r => r.getString(1) == "z" || r.getString(1) == "y"))
    +
    +    // Test with different types of collections
    +    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
    +    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
    +      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
    +
    +    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
    +
    +    val e = intercept[AnalysisException] {
    +      df2.filter($"a".isInCollection(Seq($"b")))
    +    }
    +    Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but
were")
    +      .foreach { s =>
    +        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
    +      }
    +  }
    +
    +  test("isInCollection: Java Collection") {
    +    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
    --- End diff --
    
    same thing here. just run a single test case.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message