spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From davies <...@git.apache.org>
Subject [GitHub] spark pull request #14266: [SPARK-16526][SQL] Benchmarking Performance for F...
Date Thu, 04 Aug 2016 22:49:23 GMT
Github user davies commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14266#discussion_r73616369
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
---
    @@ -576,4 +576,605 @@ class AggregateBenchmark extends BenchmarkBase {
         benchmark.run()
       }
     
    -}
    +  // This test does not do any benchmark, instead it produces generated code for vectorized
    +  // and row-based hashmaps.
    +  ignore("generated code comparison for vectorized vs. rowbased") {
    +    val N = 20 << 23
    +
    +    sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
    +
    +    sparkSession.range(N)
    +      .selectExpr(
    +        "id & 1023 as k1",
    +        "cast (id & 1023 as string) as k2")
    +      .createOrReplaceTempView("test")
    +
    +    // dataframe/query
    +    val query = sparkSession.sql("select count(k1), sum(k1) from test group by k1, k2")
    +
    +    // vectorized
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", "vectorized")
    +    query.queryExecution.debug.codegen()
    +
    +    // row based
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", "rowbased")
    +    query.queryExecution.debug.codegen()
    +  }
    +
    +  ignore("1 key field, 1 value field, distinct linear keys") {
    +    val N = 20 << 22;
    +
    +    var timeStart: Long = 0L
    +    var timeEnd: Long = 0L
    +    var nsPerRow: Long = 0L
    +    var i = 0
    +    sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
    +
    +    // scalastyle:off
    +    println(Benchmark.getJVMOSInfo())
    +    println(Benchmark.getProcessorName())
    +    printf("%20s %20s %20s %20s\n", "Num. Distinct Keys", "No Fast Hashmap",
    +      "Vectorized", "Row-based")
    +    // scalastyle:on
    +
    +    val modes = List("skip", "vectorized", "rowbased")
    +
    +    while (i < 15) {
    +      val results = modes.map(mode => {
    +        sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
    +        var j = 0
    +        var minTime: Long = 1000
    +        while (j < 5) {
    +          System.gc()
    +          sparkSession.range(N)
    +            .selectExpr(
    +              "id & " + ((1 << i) - 1) + " as k0")
    +            .createOrReplaceTempView("test")
    +          timeStart = System.nanoTime
    +          sparkSession.sql("select sum(k0)" +
    +            " from test group by k0").collect()
    +          timeEnd = System.nanoTime
    +          nsPerRow = (timeEnd - timeStart) / N
    +          if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
    +          j += 1
    +        }
    +        minTime
    +      })
    +      printf("%20s %20s %20s %20s\n", 1 << i, results(0), results(1), results(2))
    +      i += 1
    +    }
    +    printf("Unit: ns/row\n")
    +
    +    /*
    +    Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
    +    Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
    +
    +      Num. Distinct Keys      No Fast Hashmap           Vectorized            Row-based
    +                       1                   21                   13                  
11
    +                       2                   23                   14                  
13
    +                       4                   23                   14                  
14
    +                       8                   23                   14                  
14
    +                      16                   23                   12                  
13
    +                      32                   24                   12                  
13
    +                      64                   24                   14                  
16
    +                     128                   24                   14                  
13
    +                     256                   25                   14                  
14
    +                     512                   25                   16                  
14
    +                    1024                   25                   16                  
15
    +                    2048                   26                   12                  
15
    +                    4096                   27                   15                  
15
    +                    8192                   33                   16                  
15
    +                   16384                   34                   15                  
15
    +    Unit: ns/row
    +    */
    +  }
    +
    +  ignore("1 key field, 1 value field, distinct random keys") {
    +    val N = 20 << 22;
    +
    +    var timeStart: Long = 0L
    +    var timeEnd: Long = 0L
    +    var nsPerRow: Long = 0L
    +    var i = 0
    +    sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
    +
    +    // scalastyle:off
    +    println(Benchmark.getJVMOSInfo())
    +    println(Benchmark.getProcessorName())
    +    printf("%20s %20s %20s %20s\n", "Num. Distinct Keys", "No Fast Hashmap",
    +      "Vectorized", "Row-based")
    +    // scalastyle:on
    +
    +    val modes = List("skip", "vectorized", "rowbased")
    +
    +    while (i < 15) {
    +      val results = modes.map(mode => {
    +        sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
    +        var j = 0
    +        var minTime: Long = 1000
    +        while (j < 5) {
    +          System.gc()
    +          sparkSession.range(N)
    +            .selectExpr(
    +              "cast(floor(rand() * " + (1 << i) + ") as long) as k0")
    +            .createOrReplaceTempView("test")
    +          timeStart = System.nanoTime
    +          sparkSession.sql("select sum(k0)" +
    +            " from test group by k0").collect()
    +          timeEnd = System.nanoTime
    +          nsPerRow = (timeEnd - timeStart) / N
    +          if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
    +          j += 1
    +        }
    +        minTime
    +      })
    +      printf("%20s %20s %20s %20s\n", 1 << i, results(0), results(1), results(2))
    +      i += 1
    +    }
    +    printf("Unit: ns/row\n")
    +
    +    /*
    +    Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
    +    Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
    +
    +      Num. Distinct Keys      No Fast Hashmap           Vectorized            Row-based
    +                       1                   32                    9                  
13
    +                       2                   39                   16                  
22
    +                       4                   39                   14                  
23
    +                       8                   39                   13                  
22
    +                      16                   38                   13                  
20
    +                      32                   38                   13                  
20
    +                      64                   38                   13                  
20
    +                     128                   37                   16                  
21
    +                     256                   36                   17                  
22
    +                     512                   38                   17                  
21
    +                    1024                   39                   18                  
21
    +                    2048                   41                   18                  
21
    +                    4096                   44                   18                  
22
    +                    8192                   49                   20                  
23
    +                   16384                   52                   23                  
25
    +    Unit: ns/row
    +    */
    +  }
    +
    +  ignore("1 key field, varying value fields, 16 linear distinct keys") {
    +    val N = 20 << 22;
    +
    +    var timeStart: Long = 0L
    +    var timeEnd: Long = 0L
    +    var nsPerRow: Long = 0L
    +    var i = 1
    +    sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
    +
    +    // scalastyle:off
    +    println(Benchmark.getJVMOSInfo())
    +    println(Benchmark.getProcessorName())
    +    printf("%20s %20s %20s %20s\n", "Num. Value Fields", "No Fast Hashmap",
    +      "Vectorized", "Row-based")
    +    // scalastyle:on
    +
    +    val modes = List("skip", "vectorized", "rowbased")
    +
    +    while (i < 11) {
    +      val results = modes.map(mode => {
    +        sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
    +        var j = 0
    +        var minTime: Long = 1000
    +        while (j < 5) {
    +          System.gc()
    +          sparkSession.range(N)
    +            .selectExpr("id & " + 15  + " as k0")
    +            .createOrReplaceTempView("test")
    +          timeStart = System.nanoTime
    +          sparkSession.sql("select " + List.range(0, i).map(x => "sum(k" + 0 + ")").mkString(",")
+
    +            " from test group by k0").collect()
    +          timeEnd = System.nanoTime
    +          nsPerRow = (timeEnd - timeStart) / N
    +          if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
    +          j += 1
    +        }
    +        minTime
    +      })
    +      printf("%20s %20s %20s %20s\n", i, results(0), results(1), results(2))
    +      i += 1
    +    }
    +    printf("Unit: ns/row\n")
    +
    +    /*
    +    Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
    +    Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
    +
    +       Num. Value Fields      No Fast Hashmap           Vectorized            Row-based
    +                       1                   24                   15                  
12
    +                       2                   25                   24                  
14
    +                       3                   29                   25                  
17
    +                       4                   31                   32                  
22
    +                       5                   33                   40                  
24
    +                       6                   36                   36                  
27
    +                       7                   38                   44                  
28
    +                       8                   47                   50                  
32
    +                       9                   52                   55                  
37
    +                      10                   59                   59                  
45
    +    Unit: ns/row
    +    */
    +  }
    +
    +  ignore("varying key fields, 1 value field, 16 linear distinct keys") {
    +    val N = 20 << 22;
    +
    +    var timeStart: Long = 0L
    +    var timeEnd: Long = 0L
    +    var nsPerRow: Long = 0L
    +    var i = 1
    +    sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
    +    sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
    +
    +    // scalastyle:off
    +    println(Benchmark.getJVMOSInfo())
    +    println(Benchmark.getProcessorName())
    +    printf("%20s %20s %20s %20s\n", "Num. Key Fields", "No Fast Hashmap",
    +      "Vectorized", "Row-based")
    +    // scalastyle:on
    +
    +    val modes = List("skip", "vectorized", "rowbased")
    +
    +    while (i < 11) {
    +      val results = modes.map(mode => {
    +        sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
    +        var j = 0
    +        var minTime: Long = 1000
    +        while (j < 5) {
    +          System.gc()
    +          val s = "id & " + 15 + " as k"
    +          sparkSession.range(N)
    +            .selectExpr(List.range(0, i).map(x => s + x): _*)
    +            .createOrReplaceTempView("test")
    +          timeStart = System.nanoTime
    +          sparkSession.sql("select sum(k0)" +
    +            " from test group by " + List.range(0, i).map(x => "k" + x).mkString(",")).collect()
    +          timeEnd = System.nanoTime
    +          nsPerRow = (timeEnd - timeStart) / N
    +          if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
    +          j += 1
    +        }
    +        minTime
    +      })
    +      printf("%20s %20s %20s %20s\n", i, results(0), results(1), results(2))
    +      i += 1
    +    }
    +    printf("Unit: ns/row\n")
    +
    +    /*
    +    Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
    +    Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
    +
    +         Num. Key Fields      No Fast Hashmap           Vectorized            Row-based
    +                       1                   24                   15                  
13
    +                       2                   31                   20                  
14
    +                       3                   37                   22                  
17
    +                       4                   46                   26                  
18
    +                       5                   53                   27                  
20
    +                       6                   61                   29                  
23
    +                       7                   69                   36                  
25
    +                       8                   78                   37                  
27
    +                       9                   88                   43                  
30
    +                      10                   92                   45                  
33
    +    Unit: ns/row
    +    */
    +  }
    +
    +  ignore("varying key fields, varying value field, 16 linear distinct keys") {
    --- End diff --
    
    The performance difference between column-based and row-based are cache locality, could
you increase the number of distinct keys to make sure that not all the keys/values are fit
in L1 cache? for example, 4k. We could also increase that to 64k in first two cases (single
key, single value).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message