spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject spark git commit: [SPARK-16238] Metrics for generated method and class bytecode size
Date Wed, 29 Jun 2016 22:07:41 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 ef0253ff6 -> c4cebd572


[SPARK-16238] Metrics for generated method and class bytecode size

## What changes were proposed in this pull request?

This extends SPARK-15860 to include metrics for the actual bytecode size of janino-generated
methods. They can be accessed in the same way as any other codahale metric, e.g.

```
scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot().getValues()
res7: Array[Long] = Array(532, 532, 532, 542, 1479, 2670, 3585, 3585)

scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot().getValues()
res8: Array[Long] = Array(5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 38, 63, 79, 88, 94, 94,
94, 132, 132, 165, 165, 220, 220)
```

## How was this patch tested?

Small unit test, also verified manually that the performance impact is minimal (<10%).
hvanhovell

Author: Eric Liang <ekl@databricks.com>

Closes #13934 from ericl/spark-16238.

(cherry picked from commit 23c58653f900bfb71ef2b3186a95ad2562c33969)
Signed-off-by: Reynold Xin <rxin@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4cebd57
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4cebd57
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4cebd57

Branch: refs/heads/branch-2.0
Commit: c4cebd5725e6d8ade8c0a02652e251d04903da72
Parents: ef0253f
Author: Eric Liang <ekl@databricks.com>
Authored: Wed Jun 29 15:07:32 2016 -0700
Committer: Reynold Xin <rxin@databricks.com>
Committed: Wed Jun 29 15:07:38 2016 -0700

----------------------------------------------------------------------
 .../spark/metrics/source/StaticSources.scala    | 12 ++++++
 .../expressions/codegen/CodeGenerator.scala     | 40 +++++++++++++++++++-
 .../expressions/CodeGenerationSuite.scala       |  4 ++
 3 files changed, 55 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c4cebd57/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
index 6819222..6bba259 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
@@ -47,4 +47,16 @@ object CodegenMetrics extends Source {
    * Histogram of the time it took to compile source code text (in milliseconds).
    */
   val METRIC_COMPILATION_TIME = metricRegistry.histogram(MetricRegistry.name("compilationTime"))
+
+  /**
+   * Histogram of the bytecode size of each class generated by CodeGenerator.
+   */
+  val METRIC_GENERATED_CLASS_BYTECODE_SIZE =
+    metricRegistry.histogram(MetricRegistry.name("generatedClassSize"))
+
+  /**
+   * Histogram of the bytecode size of each method in classes generated by CodeGenerator.
+   */
+  val METRIC_GENERATED_METHOD_BYTECODE_SIZE =
+    metricRegistry.histogram(MetricRegistry.name("generatedMethodSize"))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/c4cebd57/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 6392ff4..16fb1f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -17,11 +17,16 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
+import java.io.ByteArrayInputStream
+import java.util.{Map => JavaMap}
+
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
-import org.codehaus.janino.ClassBodyEvaluator
+import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, SimpleCompiler}
+import org.codehaus.janino.util.ClassFile
 import scala.language.existentials
 
 import org.apache.spark.SparkEnv
@@ -876,6 +881,7 @@ object CodeGenerator extends Logging {
 
     try {
       evaluator.cook("generated.java", code.body)
+      recordCompilationStats(evaluator)
     } catch {
       case e: Exception =>
         val msg = s"failed to compile: $e\n$formatted"
@@ -886,6 +892,38 @@ object CodeGenerator extends Logging {
   }
 
   /**
+   * Records the generated class and method bytecode sizes by inspecting janino private fields.
+   */
+  private def recordCompilationStats(evaluator: ClassBodyEvaluator): Unit = {
+    // First retrieve the generated classes.
+    val classes = {
+      val resultField = classOf[SimpleCompiler].getDeclaredField("result")
+      resultField.setAccessible(true)
+      val loader = resultField.get(evaluator).asInstanceOf[ByteArrayClassLoader]
+      val classesField = loader.getClass.getDeclaredField("classes")
+      classesField.setAccessible(true)
+      classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala
+    }
+
+    // Then walk the classes to get at the method bytecode.
+    val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
+    val codeAttrField = codeAttr.getDeclaredField("code")
+    codeAttrField.setAccessible(true)
+    classes.foreach { case (_, classBytes) =>
+      CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
+      val cf = new ClassFile(new ByteArrayInputStream(classBytes))
+      cf.methodInfos.asScala.foreach { method =>
+        method.getAttributes().foreach { a =>
+          if (a.getClass.getName == codeAttr.getName) {
+            CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(
+              codeAttrField.get(a).asInstanceOf[Array[Byte]].length)
+          }
+        }
+      }
+    }
+  }
+
+  /**
    * A cache of generated classes.
    *
    * From the Guava Docs: A Cache is similar to ConcurrentMap, but not quite the same. The
most

http://git-wip-us.apache.org/repos/asf/spark/blob/c4cebd57/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 60dd03f..8ea8f61 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -53,9 +53,13 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper
{
   test("metrics are recorded on compile") {
     val startCount1 = CodegenMetrics.METRIC_COMPILATION_TIME.getCount()
     val startCount2 = CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount()
+    val startCount3 = CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount()
+    val startCount4 = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount()
     GenerateOrdering.generate(Add(Literal(123), Literal(1)).asc :: Nil)
     assert(CodegenMetrics.METRIC_COMPILATION_TIME.getCount() == startCount1 + 1)
     assert(CodegenMetrics.METRIC_SOURCE_CODE_SIZE.getCount() == startCount2 + 1)
+    assert(CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getCount() > startCount1)
+    assert(CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getCount() > startCount1)
   }
 
   test("SPARK-8443: split wide projections into blocks due to JVM code size limit") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message