spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Raj (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (SPARK-25749) Exception thrown while reading avro file with large schema
Date Tue, 16 Oct 2018 17:41:00 GMT

     [ https://issues.apache.org/jira/browse/SPARK-25749?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Raj updated SPARK-25749:
------------------------
    Attachment: build.sbt

> Exception thrown while reading avro file with large schema
> ----------------------------------------------------------
>
>                 Key: SPARK-25749
>                 URL: https://issues.apache.org/jira/browse/SPARK-25749
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.3.0, 2.3.1, 2.3.2
>            Reporter: Raj
>            Priority: Blocker
>         Attachments: EncoderExample.scala, MainCC.scala, build.sbt
>
>
> Hi, We are migrating our jobs from Spark 2.2.0 to Spark 2.3.1. One of the job reads avro
source that has large nested schema. The job fails for Spark 2.3.1(Have tested in Spark 2.3.0
& Spark 2.3.2 and the job fails in this case also). I am able to replicate this with some
sample data. Please find below the code, build file & exception log
> *Code (EncoderExample.scala)*
>  
> package com.rj.enc
> import com.rj.logger.CustomLogger
> import org.apache.log4j.Logger
> import com.rj.sc.SparkUtil
> import org.apache.spark.sql.catalyst.ScalaReflection
> import org.apache.spark.sql.types.StructType
> import org.apache.spark.sql.Encoders
> object EncoderExample {
>  
>  val log: Logger = CustomLogger.getLogger(this.getClass.getName.dropRight(1))
>  val user = "xxx"
>  val sourcePath = s"file:///Users/$user/del/avrodata"
>  val resultPath = s"file:///Users/$user/del/pqdata"
>  
>  def main(args: Array[String]): Unit = {
>  writeData() // Create sample data 
>  readData() // Read, Process & write back the results (App fails in this method for
spark 2.3.1)
>  }
>  
>  def readData(): Unit = {
>  log.info("sourcePath -> " + sourcePath)
>  val ss = SparkUtil.getSparkSession(this.getClass.getName)
>  val schema = ScalaReflection.schemaFor[MainCC].dataType.asInstanceOf[StructType]
>  import com.databricks.spark.avro._
>  import ss.implicits._
>  val ds = ss.sqlContext.read.schema(schema).option("basePath", sourcePath).
>  avro(this.sourcePath).as[MainCC]
>  log.info("Schema -> " + ds.schema.treeString)
>  log.info("Count x -> " + ds.count)
>  val encr = Encoders.product[ResultCC]
>  val res = ds.map{ x =>
>  val es: Long = x.header.tamp
>  ResultCC(es = es)
>  }(encr)
>  res.write.parquet(this.resultPath)
>  }
>  
>  def writeData(): Unit = {
>  val ss = SparkUtil.getSparkSession(this.getClass.getName)
>  import ss.implicits._
>  val ds = ss.sparkContext.parallelize(Seq(MainCC(), MainCC())).toDF//.as[MainCC]
>  log.info("source count 5 -> " + ds.count)
>  import com.databricks.spark.avro._
>  ds.write.avro(this.sourcePath)
>  log.info("Written")
>  }
>  
> }
> final case class ResultCC(
>  es: Long)
> *Case Class (Schema of source avro data)*
> package com.rj.enc
>  
> case class Header(tamp: Long = 12, xy: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>  
> case class Key(hi: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>  
> case class L30 (
>   l1: Option[Double] = Some(123d)
> ,l2: Option[Double] = Some(123d)
>  ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,l5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,l6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class C45 (
>  r1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,r2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class B45 (
>   e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,e2: Option[Int] = Some(123)
>  ,e3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class D45 (`t1`: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
>  
> case class M30 (
> b1: Option[B45] = Some(B45())
> ,b2: Option[C45] = Some(C45())
> ,b3: Option[D45] = Some(D45())
> )
>  
> case class Y50 (
>    g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class X50 (
>   c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class L10 (
>   u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,u3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>  ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,u5: Option[Y50] = Some(Y50())
> ,u6: Option[X50] = Some(X50())
>  ,u7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class Z10 (
>   m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class X10(
>   i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i2: Option[L30] = Some(L30())
>  ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ,i4: Option[M30] = Some(M30())
> ,i5: Option[Boolean] = Some(false)
> )
>  
> case class R10 (
>    t1: Option[Long] = Some(123l)
>   ,t2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,t9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,u1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,u2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,u3: Option[Map[String, Option[String]]] = Some(Map.empty)
>   ,u4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> case class A15(
>    h1: Option[R10] = Some(R10())
>   ,h2: Option[X10] = Some(X10())
>   ,h3: Option[L10] = Some(L10())
>   ,h4: Option[Z10] = Some(Z10())
> )
>  
> case class B15(
>    m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
>    n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> ){
>   def toMap: Map[String,String]={
>     Map(
>       ("m1", this.m1),
>       ("m2", this.m2),
>       ("m3", this.m3),
>       ("m4", this.m4),
>       ("m5", this.m5),
>       ("m6", this.m6),
>       ("m7", this.m7),
>       ("m8", this.m8),
>       ("m9", this.m9),
>       ("n1", this.n1),
>       ("n2", this.n2),
>       ("n3", this.n2),
>       ("n4", this.n3)
>     ).map(tup => {
>       val (k,v) = tup
>       (k, v.orNull)
>     })
>   }
> }
>  
> case class Value (
>    a1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a2: Option[Long] = Some(123l)
>   ,a3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,a9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,b9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c6: Option[Double] = Some(1.23d)
>   ,c7: Option[Double] = Some(1.1d)
>   ,c8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,c9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d1: Option[Int] = Some(123)
>   ,d2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d6: Option[Long] = Some(123)
>   ,d7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,d9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e3: Option[Int] = Some(123)
>   ,e4: Option[Int] = Some(234)
>   ,e5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,e9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,f9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,g8: Option[Int] = Some(123)
>   ,g9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h1: Option[Long] = Some(123l)
>   ,h2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,h9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,i9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,j9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,k9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l5: Option[Int] = Some(123)
>   ,l6: Option[Int] = Some(123)
>   ,l7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,l9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m3: Option[Map[String, Option[String]]] = Some(Map.empty)
>   ,m4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m5: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m7: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m8: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,m9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n1: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n2: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n3: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n4: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n5: Option[Boolean] = Some(true)
>   ,n6: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
>   ,n7: Option[A15] = Some(A15())
>   ,n8: Option[B15] = Some(B15())
>   ,n9: Option[String] = Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
> )
>  
> final case class MainCC(date: Int = 20181008,
>     header: Header = Header(), value: Value = Value(), key: Key = Key())
>  
> +*build.sbt*+
> name := "sparkutil"
> version := "5.0"
> scalaVersion := "2.11.8"
> EclipseKeys.withSource := true
> scalacOptions ++= Seq(
>   "-Ywarn-dead-code",
>   "-Ywarn-unused"
> )
> val sparkVer = "2.3.1"
> libraryDependencies ++= Seq(
>   "org.apache.spark" %% "spark-core" % sparkVer % "provided",
>   "org.apache.spark" %% "spark-sql" % sparkVer % "provided",
>   "org.apache.spark" %% "spark-hive" % sparkVer % "provided", 
> "com.databricks" %% "spark-avro" % "4.0.0",
>   "log4j" % "log4j" % "1.2.17",
>   "com.github.scopt" %% "scopt" % "3.6.0",
>   "com.googlecode.json-simple" % "json-simple" % "1.1.1",
> "com.google.cloud" % "google-cloud-bigquery" % "0.17.1-beta",
>   "com.databricks" %% "spark-avro" % "4.0.0",
>   "org.scalatest" %% "scalatest" % "3.0.5",
>   "com.fasterxml.jackson.dataformat" % "jackson-dataformat-yaml" % "2.8.3"
> )
>  assemblyMergeStrategy in assembly := {
>   case PathList("com", "google", xs @ _*) => MergeStrategy.last
> case PathList("org", "apache", "avro", xs @ _*) => MergeStrategy.last
>   case "project.properties" => MergeStrategy.last
> case PathList("META-INF", xs @ _*) =>
>     (xs map \{_.toLowerCase}) match {
>       case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil)
=> MergeStrategy.discard
>       case _ => MergeStrategy.discard
>     }
>   case _ =>
>     MergeStrategy.first
> }
> +*Exception Thrown*+ (PS: I have also increase the heap size in eclipse, but that does
not solve the issue)
> 18/10/16 12:28:40 || ERROR || org.apache.spark.util.Utils || logError() || 91 || Aborting
task
> java.lang.OutOfMemoryError: Java heap space
> at java.util.Arrays.copyOf(Arrays.java:3332)
> at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124)
> at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:448)
> at java.lang.StringBuilder.append(StringBuilder.java:136)
> at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:200)
> at scala.collection.TraversableOnce$$anonfun$addString$1.apply(TraversableOnce.scala:364)
> at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:357)
> at scala.collection.AbstractTraversable.addString(Traversable.scala:104)
> at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:323)
> at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
> at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:325)
> at scala.collection.AbstractTraversable.mkString(Traversable.scala:104)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:137)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$5.apply(RuleExecutor.scala:138)
> at org.apache.spark.internal.Logging$class.logDebug(Logging.scala:58)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.logDebug(RuleExecutor.scala:40)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:134)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$$anonfun$canonicalize$1.apply(GenerateUnsafeProjection.scala:354)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
> at scala.collection.immutable.List.map(List.scala:285)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:354)
> at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.canonicalize(GenerateUnsafeProjection.scala:32)
> at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1321)
> 18/10/16 12:28:40 || ERROR || org.apache.spark.sql.execution.datasources.FileFormatWriter
|| logError() || 70 || Job job_20181016122823_0005 aborted.
> 18/10/16 12:28:40 || ERROR || org.apache.spark.executor.Executor || logError() || 91
|| Exception in task 0.0 in stage 5.0 (TID 5)
> org.apache.spark.SparkException: Task failed while writing rows.
> at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:285)
> at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:197)
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message