hbase-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Ted Yu <yuzhih...@gmail.com>
Subject Re: spark 2 segfault
Date Mon, 02 May 2016 04:09:46 GMT
Using commit hash 90787de864b58a1079c23e6581381ca8ffe7685f and
Java 1.7.0_67 , I got:

scala> val dfComplicated = sc.parallelize(List((Map("1" -> "a"), List("b",
"c")), (Map("2" -> "b"), List("d", "e")))).toDF
...
dfComplicated: org.apache.spark.sql.DataFrame = [_1: map<string,string>,
_2: array<string>]

scala> dfComplicated.show
java.lang.OutOfMemoryError: Java heap space
  at org.apache.spark.unsafe.types.UTF8String.getBytes(UTF8String.java:229)
  at org.apache.spark.unsafe.types.UTF8String.toString(UTF8String.java:821)
  at
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown
Source)
  at
org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:241)
  at
org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Dataset.scala:2121)
  at
org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Dataset.scala:2121)
  at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at
scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  at
org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2121)
  at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:54)
  at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2408)
  at org.apache.spark.sql.Dataset.org
$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2120)
  at org.apache.spark.sql.Dataset.org
$apache$spark$sql$Dataset$$collect(Dataset.scala:2127)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1861)
  at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1860)
  at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2438)
  at org.apache.spark.sql.Dataset.head(Dataset.scala:1860)
  at org.apache.spark.sql.Dataset.take(Dataset.scala:2077)
  at org.apache.spark.sql.Dataset.showString(Dataset.scala:238)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:529)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:489)
  at org.apache.spark.sql.Dataset.show(Dataset.scala:498)
  ... 6 elided

scala>

On Sun, May 1, 2016 at 8:34 PM, Koert Kuipers <koert@tresata.com> wrote:

> by removing dependencies it turns into a different error, see below.
> the test is simply writing a DataFrame out to file and reading it back in.
> i see the error for all data sources (json, parquet, etc.).
>
> this is the data that i write out and read back in:
> val dfComplicated = sc.parallelize(List((Map("1" -> "a"), List("b", "c")),
> (Map("2" -> "b"), List("d", "e")))).toDF
>
>
> [info]   java.lang.RuntimeException: Error while decoding:
> java.lang.NegativeArraySizeException
> [info] createexternalrow(if (isnull(input[0, map<string,string>])) null
> else staticinvoke(class
> org.apache.spark.sql.catalyst.util.ArrayBasedMapData$, ObjectType(interface
> scala.collection.Map), toScalaMap, staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType), lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType).toString, input[0,
> map<string,string>].keyArray).array, true), staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType), lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType).toString, input[0,
> map<string,string>].valueArray).array, true), true), if (isnull(input[1,
> array<string>])) null else staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue20, MapObjects_loopIsNull21,
> StringType), lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType).toString, input[1,
> array<string>]).array, true),
> StructField(_1,MapType(StringType,StringType,true),true),
> StructField(_2,ArrayType(StringType,true),true))
> [info] :- if (isnull(input[0, map<string,string>])) null else
> staticinvoke(class org.apache.spark.sql.catalyst.util.ArrayBasedMapData$,
> ObjectType(interface scala.collection.Map), toScalaMap, staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType), lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType).toString, input[0,
> map<string,string>].keyArray).array, true), staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType), lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType).toString, input[0,
> map<string,string>].valueArray).array, true), true)
> [info] :  :- isnull(input[0, map<string,string>])
> [info] :  :  +- input[0, map<string,string>]
> [info] :  :- null
> [info] :  +- staticinvoke(class
> org.apache.spark.sql.catalyst.util.ArrayBasedMapData$, ObjectType(interface
> scala.collection.Map), toScalaMap, staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType), lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType).toString, input[0,
> map<string,string>].keyArray).array, true), staticinvoke(class
> scala.collection.mutable.WrappedArray$, ObjectType(interface
> scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType), lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType).toString, input[0,
> map<string,string>].valueArray).array, true), true)
> [info] :     :- staticinvoke(class scala.collection.mutable.WrappedArray$,
> ObjectType(interface scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType), lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType).toString, input[0,
> map<string,string>].keyArray).array, true)
> [info] :     :  +- mapobjects(lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType),
> lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType).toString, input[0, map<string,string>].keyArray).array
> [info] :     :     +- mapobjects(lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType),
> lambdavariable(MapObjects_loopValue16, MapObjects_loopIsNull17,
> StringType).toString, input[0, map<string,string>].keyArray)
> [info] :     :        :- lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType).toString
> [info] :     :        :  +- lambdavariable(MapObjects_loopValue16,
> MapObjects_loopIsNull17, StringType)
> [info] :     :        +- input[0, map<string,string>].keyArray
> [info] :     :           +- input[0, map<string,string>]
> [info] :     +- staticinvoke(class scala.collection.mutable.WrappedArray$,
> ObjectType(interface scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType), lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType).toString, input[0,
> map<string,string>].valueArray).array, true)
> [info] :        +- mapobjects(lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType),
> lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType).toString, input[0, map<string,string>].valueArray).array
> [info] :           +- mapobjects(lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType),
> lambdavariable(MapObjects_loopValue18, MapObjects_loopIsNull19,
> StringType).toString, input[0, map<string,string>].valueArray)
> [info] :              :- lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType).toString
> [info] :              :  +- lambdavariable(MapObjects_loopValue18,
> MapObjects_loopIsNull19, StringType)
> [info] :              +- input[0, map<string,string>].valueArray
> [info] :                 +- input[0, map<string,string>]
> [info] +- if (isnull(input[1, array<string>])) null else
> staticinvoke(class scala.collection.mutable.WrappedArray$,
> ObjectType(interface scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue20, MapObjects_loopIsNull21,
> StringType), lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType).toString, input[1,
> array<string>]).array, true)
> [info]    :- isnull(input[1, array<string>])
> [info]    :  +- input[1, array<string>]
> [info]    :- null
> [info]    +- staticinvoke(class scala.collection.mutable.WrappedArray$,
> ObjectType(interface scala.collection.Seq), make,
> mapobjects(lambdavariable(MapObjects_loopValue20, MapObjects_loopIsNull21,
> StringType), lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType).toString, input[1,
> array<string>]).array, true)
> [info]       +- mapobjects(lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType),
> lambdavariable(MapObjects_loopValue20, MapObjects_loopIsNull21,
> StringType).toString, input[1, array<string>]).array
> [info]          +- mapobjects(lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType),
> lambdavariable(MapObjects_loopValue20, MapObjects_loopIsNull21,
> StringType).toString, input[1, array<string>])
> [info]             :- lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType).toString
> [info]             :  +- lambdavariable(MapObjects_loopValue20,
> MapObjects_loopIsNull21, StringType)
> [info]             +- input[1, array<string>]
> [info]   at
> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(ExpressionEncoder.scala:244)
> [info]   at
> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Dataset.scala:2121)
> [info]   at
> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Dataset.scala:2121)
> [info]   at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
> [info]   at
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245)
> [info]   at
> scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
> [info]   at
> scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
> [info]   at
> scala.collection.TraversableLike$class.map(TraversableLike.scala:245)
> [info]   at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
> [info]   at
> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2121)
> [info]   at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:54)
> [info]   at
> org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2408)
> [info]   at org.apache.spark.sql.Dataset.org
> $apache$spark$sql$Dataset$$execute$1(Dataset.scala:2120)
> [info]   at
> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Dataset.scala:2125)
> [info]   at
> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Dataset.scala:2125)
> [info]   at org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2421)
> [info]   at org.apache.spark.sql.Dataset.org
> $apache$spark$sql$Dataset$$collect(Dataset.scala:2125)
> [info]   at org.apache.spark.sql.Dataset.collect(Dataset.scala:2101)
>
>
>
> On Thu, Apr 28, 2016 at 2:41 PM, Ted Yu <yuzhihong@gmail.com> wrote:
>
>> Are you able to pastebin a unit test which can reproduce the following ?
>>
>> Thanks
>>
>> On Apr 28, 2016, at 11:35 AM, Koert Kuipers <koert@tresata.com> wrote:
>>
>> i tried for the first time to run our own in-house unit tests on spark 2,
>> and i get the error below.
>> has anyone seen this?
>>
>> it is reproducible. i tried latest java 7 and it is still there.
>>
>> # A fatal error has been detected by the Java Runtime Environment:
>> #
>> #  SIGSEGV (0xb) at pc=0x00007f7c3a4b1f54, pid=21939, tid=140171011417856
>> #
>> # JRE version: Java(TM) SE Runtime Environment (7.0_75-b13) (build
>> 1.7.0_75-b13)
>> # Java VM: Java HotSpot(TM) 64-Bit Server VM (24.75-b04 mixed mode
>> linux-amd64 compressed oops)
>> # Problematic frame:
>> # V  [libjvm.so+0x747f54]  _Copy_arrayof_conjoint_jlongs+0x44
>>
>> more info:
>>
>> Stack: [0x00007f7c1b47e000,0x00007f7c1b57f000],  sp=0x00007f7c1b57a9a8,
>> free space=1010k
>> Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native
>> code)
>> V  [libjvm.so+0x747f54]  _Copy_arrayof_conjoint_jlongs+0x44
>> j  sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V+0
>> j
>> org.apache.spark.unsafe.Platform.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V+34
>> j  org.apache.spark.unsafe.types.UTF8String.getBytes()[B+76
>> j  org.apache.spark.unsafe.types.UTF8String.toString()Ljava/lang/String;+5
>> j
>> org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Ljava/lang/Object;)Ljava/lang/Object;+876
>> j
>> org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.fromRow(Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;+5
>> j
>> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Lorg/apache/spark/sql/catalyst/InternalRow;)Ljava/lang/Object;+11
>> j
>> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1$$anonfun$apply$13.apply(Ljava/lang/Object;)Ljava/lang/Object;+5
>> J 13277 C2
>> scala.collection.mutable.ArrayOps$ofRef.map(Lscala/Function1;Lscala/collection/generic/CanBuildFrom;)Ljava/lang/Object;
>> (7 bytes) @ 0x00007f7c25eeae08 [0x00007f7c25eead40+0xc8]
>> j
>> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply()Ljava/lang/Object;+43
>> j
>> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(Lorg/apache/spark/sql/SparkSession;Lorg/apache/spark/sql/execution/QueryExecution;Lscala/Function0;)Ljava/lang/Object;+106
>> j
>> org.apache.spark.sql.Dataset.withNewExecutionId(Lscala/Function0;)Ljava/lang/Object;+12
>> j  org.apache.spark.sql.Dataset.org
>> $apache$spark$sql$Dataset$$execute$1()Ljava/lang/Object;+9
>> j
>> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Lorg/apache/spark/sql/Dataset;)Ljava/lang/Object;+4
>> j
>> org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collect$1.apply(Ljava/lang/Object;)Ljava/lang/Object;+5
>> j
>> org.apache.spark.sql.Dataset.withCallback(Ljava/lang/String;Lorg/apache/spark/sql/Dataset;Lscala/Function1;)Ljava/lang/Object;+25
>> j  org.apache.spark.sql.Dataset.org
>> $apache$spark$sql$Dataset$$collect(Z)Ljava/lang/Object;+20
>> j  org.apache.spark.sql.Dataset.collect()Ljava/lang/Object;+2
>>
>>
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message