daffodil-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From slawre...@apache.org
Subject [incubator-daffodil] branch master updated: Fix deep stack sizes when serializing some schemas
Date Mon, 09 Mar 2020 16:48:07 GMT
This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git


The following commit(s) were added to refs/heads/master by this push:
     new 1267898  Fix deep stack sizes when serializing some schemas
1267898 is described below

commit 126789891ed5e834037198e5aa59e1f6b34dcb04
Author: Steve Lawrence <slawrence@apache.org>
AuthorDate: Mon Mar 9 09:19:46 2020 -0400

    Fix deep stack sizes when serializing some schemas
    
    The "parents" val in a DPathCompileInfo is a backpointer to all
    DPathCompileInfo's that reference it. The problem with this is that when
    elements are shared, these backpointers create a highly connected graph
    that requires a large stack to serialize using the default java
    serialization as it jumps around parents and children. To avoid this
    large stack requirement, we make the parents backpointer transient. This
    prevents jumping back up to parents during serialization and results in
    only needing a stack depth relative to the schema depth. Once all that
    serialization is completed and all the DPathCompileInfo's are
    serialized, we then manually traverse all the DPathCompileInfo's again
    and serialize the parent sequences (via the serailizeParents method).
    Because all the DPathCompileInfo's are already serialized, this just
    serializes the Sequence objects and the stack depth is again relative to
    the schema depth.
    
    On complex schemas, this saw an order of magnitude reduction in stack
    size during serialization.
    
    DAFFODIL-2283
---
 .../scala/org/apache/daffodil/util/Serialize.scala |  1 -
 .../apache/daffodil/dsom/CompiledExpression1.scala | 54 +++++++++++++++++++++-
 .../daffodil/processors/SchemaSetRuntimeData.scala | 10 ++++
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
index 1a94fa8..20bb92e 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
@@ -41,7 +41,6 @@ trait PreSerialization extends Serializable {
 
   protected final def serializeObject(out: java.io.ObjectOutputStream) {
     try {
-      // println("serializing " + Misc.getNameFromClass(this)) // good place for a breakpoint
       preSerialization
       out.defaultWriteObject()
     } catch {
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
index da9c5a4..6a6c985 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
@@ -212,7 +212,47 @@ class DPathCompileInfo(
   extends ImplementsThrowsSDE with PreSerialization
   with HasSchemaFileLocation {
 
-  lazy val parents = parentsArg
+
+  /**
+   * This "parents" val is a backpointer to all DPathCompileInfo's that
+   * reference this DPathCompileInfo. The problem with this is that when
+   * elements are shared, these backpointers create a highly connected graph
+   * that requires a large stack to serialize using the default java
+   * serialization as it jumps around parents and children. To avoid this large
+   * stack requirement, we make the parents backpointer transient. This
+   * prevents jumping back up to parents during serialization and results in
+   * only needing a stack depth relative to the schema depth. Once all that
+   * serialization is completed and all the DPathCompileInfo's are serialized,
+   * we then manually traverse all the DPathCompileInfo's again and serialize
+   * the parent sequences (via the serailizeParents method). Because all the
+   * DPathCompileInfo's are already serialized, this just serializes the
+   * Sequence objects and the stack depth is again relative to the schema
+   * depth.
+   */
+  @transient
+  val parents = parentsArg
+
+  def serializeParents(oos: java.io.ObjectOutputStream): Unit = {
+    oos.writeObject(parents)
+  }
+
+  def deserializeParents(ois: java.io.ObjectInputStream): Unit = {
+    val deserializedParents = ois.readObject().asInstanceOf[Seq[DPathCompileInfo]]
+
+    // Set the parents field via reflection so that it can be a val rather than a var
+    val clazz = this.getClass
+    val parentsField = try {
+      clazz.getDeclaredField("parents")
+    } catch {
+      case e: java.lang.NoSuchFieldException =>
+        clazz.getSuperclass.getDeclaredField("parents")
+    }
+    parentsField.setAccessible(true)
+    parentsField.set(this, deserializedParents) // set the value to the deserialized value
+    parentsField.setAccessible(false)
+  }
+
+
   lazy val variableMap =
     variableMapArg
 
@@ -222,7 +262,6 @@ class DPathCompileInfo(
   lazy val typeCalcMap: TypeCalcMap = typeCalcMapArg.map(identity)
 
   override def preSerialization: Any = {
-    parents
     variableMap
   }
 
@@ -301,6 +340,17 @@ class DPathElementCompileInfo(
     unqualifiedPathStepPolicy,
     typeCalcMap, lexicalContextRuntimeData) {
 
+  override def serializeParents(oos: java.io.ObjectOutputStream): Unit = {
+    super.serializeParents(oos)
+    elementChildrenCompileInfo.foreach { _.serializeParents(oos) }
+  }
+
+  override def deserializeParents(ois: java.io.ObjectInputStream): Unit = {
+    super.deserializeParents(ois)
+    elementChildrenCompileInfo.foreach { _.deserializeParents(ois) }
+  }
+
+
   lazy val elementChildrenCompileInfo = elementChildrenCompileInfoArg
 
   override def preSerialization: Any = {
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
index b974cb7..72e0768 100644
--- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
+++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
@@ -40,4 +40,14 @@ final class SchemaSetRuntimeData(
   override def schemaFileLocation = elementRuntimeData.schemaFileLocation
   override def SDE(str: String, args: Any*) = elementRuntimeData.SDE(str, args)
 
+  private def writeObject(oos: java.io.ObjectOutputStream): Unit = {
+    oos.defaultWriteObject()
+    elementRuntimeData.dpathElementCompileInfo.serializeParents(oos)
+  }
+
+  private def readObject(ois: java.io.ObjectInputStream): Unit = {
+    ois.defaultReadObject()
+    elementRuntimeData.dpathElementCompileInfo.deserializeParents(ois)
+  }
+
 }


Mime
View raw message