spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Izek Greenfield (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (SPARK-25028) AnalyzePartitionCommand failed with NPE if value is null
Date Wed, 08 Aug 2018 10:25:00 GMT

     [ https://issues.apache.org/jira/browse/SPARK-25028?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Izek Greenfield updated SPARK-25028:
------------------------------------
    Description: 
on line 143: val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString)

If the value is NULL the code will fail with NPE

*sample:*

{code:scala}
val df = List((1, null , "first"), (2, null , "second")).toDF("index", "name", "value").withColumn("name",
$"name".cast("string"))
df.write.partitionBy("name").saveAsTable("df13")
spark.sql("ANALYZE TABLE df13 PARTITION (name) COMPUTE STATISTICS")
{code}

output:

2018-08-08 09:25:43 WARN  BaseSessionStateBuilder$$anon$1:66 - Max iterations (2) reached
for batch Resolution
java.lang.NullPointerException
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.Range.foreach(Range.scala:160)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.AbstractTraversable.map(Traversable.scala:104)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:143)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:142)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.calculateRowCountsPerPartition(AnalyzePartitionCommand.scala:142)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.run(AnalyzePartitionCommand.scala:104)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253)
  at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)
  ... 49 elided

  was:
on line 143: val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString)

If the value is NULL the code will fail with NPE

sample:

val df = List((1, null , "first"), (2, null , "second")).toDF("index", "name", "value").withColumn("name",
$"name".cast("string"))
df.write.partitionBy("name").saveAsTable("df13")
spark.sql("ANALYZE TABLE df13 PARTITION (name) COMPUTE STATISTICS")


output:

2018-08-08 09:25:43 WARN  BaseSessionStateBuilder$$anon$1:66 - Max iterations (2) reached
for batch Resolution
java.lang.NullPointerException
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.Range.foreach(Range.scala:160)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.AbstractTraversable.map(Traversable.scala:104)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:143)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:142)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.calculateRowCountsPerPartition(AnalyzePartitionCommand.scala:142)
  at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.run(AnalyzePartitionCommand.scala:104)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253)
  at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)
  ... 49 elided


> AnalyzePartitionCommand failed with NPE if value is null
> --------------------------------------------------------
>
>                 Key: SPARK-25028
>                 URL: https://issues.apache.org/jira/browse/SPARK-25028
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 2.4.0
>            Reporter: Izek Greenfield
>            Priority: Major
>
> on line 143: val partitionColumnValues = partitionColumns.indices.map(r.get(_).toString)
> If the value is NULL the code will fail with NPE
> *sample:*
> {code:scala}
> val df = List((1, null , "first"), (2, null , "second")).toDF("index", "name", "value").withColumn("name",
$"name".cast("string"))
> df.write.partitionBy("name").saveAsTable("df13")
> spark.sql("ANALYZE TABLE df13 PARTITION (name) COMPUTE STATISTICS")
> {code}
> output:
> 2018-08-08 09:25:43 WARN  BaseSessionStateBuilder$$anon$1:66 - Max iterations (2) reached
for batch Resolution
> java.lang.NullPointerException
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1$$anonfun$8.apply(AnalyzePartitionCommand.scala:143)
>   at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>   at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>   at scala.collection.immutable.Range.foreach(Range.scala:160)
>   at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
>   at scala.collection.AbstractTraversable.map(Traversable.scala:104)
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:143)
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand$$anonfun$calculateRowCountsPerPartition$1.apply(AnalyzePartitionCommand.scala:142)
>   at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>   at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>   at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
>   at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
>   at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
>   at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.calculateRowCountsPerPartition(AnalyzePartitionCommand.scala:142)
>   at org.apache.spark.sql.execution.command.AnalyzePartitionCommand.run(AnalyzePartitionCommand.scala:104)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
>   at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
>   at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
>   at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:190)
>   at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3254)
>   at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
>   at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3253)
>   at org.apache.spark.sql.Dataset.<init>(Dataset.scala:190)
>   at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:75)
>   at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:641)
>   ... 49 elided



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message