spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yh...@apache.org
Subject spark git commit: [SPARK-17065][SQL] Improve the error message when encountering an incompatible DataSourceRegister
Date Mon, 15 Aug 2016 22:55:55 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8f4cacd3a -> 45036327f


[SPARK-17065][SQL] Improve the error message when encountering an incompatible DataSourceRegister

## What changes were proposed in this pull request?

Add an instruction to ask the user to remove or upgrade the incompatible DataSourceRegister
in the error message.

## How was this patch tested?

Test command:
```
build/sbt -Dscala-2.10 package
SPARK_SCALA_VERSION=2.10 bin/spark-shell --packages ai.h2o:sparkling-water-core_2.10:1.6.5

scala> Seq(1).toDS().write.format("parquet").save("foo")
```

Before:
```
java.util.ServiceConfigurationError: org.apache.spark.sql.sources.DataSourceRegister: Provider
org.apache.spark.h2o.DefaultSource could not be instantiated
	at java.util.ServiceLoader.fail(ServiceLoader.java:232)
	at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
	at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
...
Caused by: java.lang.NoClassDefFoundError: org/apache/spark/Logging
	at java.lang.ClassLoader.defineClass1(Native Method)
	at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
	at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
	at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
	at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
	at java.security.AccessController.doPrivileged(Native Method)
...
```

After:

```
java.lang.ClassNotFoundException: Detected an incompatible DataSourceRegister. Please remove
the incompatible library from classpath or upgrade it. Error: org.apache.spark.sql.sources.DataSourceRegister:
Provider org.apache.spark.h2o.DefaultSource could not be instantiated
	at org.apache.spark.sql.execution.datasources.DataSource.lookupDataSource(DataSource.scala:178)
	at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:79)
	at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:441)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:213)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:196)
...
```

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #14651 from zsxwing/SPARK-17065.

(cherry picked from commit 268b71d0d792f875fcfaec5314862236754a00d6)
Signed-off-by: Yin Huai <yhuai@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45036327
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45036327
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45036327

Branch: refs/heads/branch-2.0
Commit: 45036327fdbdb0167b3c53245fce9dc2be67ffe9
Parents: 8f4cacd
Author: Shixiong Zhu <shixiong@databricks.com>
Authored: Mon Aug 15 15:55:32 2016 -0700
Committer: Yin Huai <yhuai@databricks.com>
Committed: Mon Aug 15 15:55:50 2016 -0700

----------------------------------------------------------------------
 .../sql/execution/datasources/DataSource.scala  | 91 +++++++++++---------
 1 file changed, 52 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/45036327/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index f572b93..f5727da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.util.ServiceLoader
+import java.util.{ServiceConfigurationError, ServiceLoader}
 
 import scala.collection.JavaConverters._
 import scala.language.{existentials, implicitConversions}
@@ -123,50 +123,63 @@ case class DataSource(
     val loader = Utils.getContextOrSparkClassLoader
     val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
 
-    serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match {
-      // the provider format did not match any given registered aliases
-      case Nil =>
-        try {
-          Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match
{
-            case Success(dataSource) =>
-              // Found the data source using fully qualified path
-              dataSource
-            case Failure(error) =>
-              if (provider.toLowerCase == "orc" ||
+    try {
+      serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match
{
+        // the provider format did not match any given registered aliases
+        case Nil =>
+          try {
+            Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match
{
+              case Success(dataSource) =>
+                // Found the data source using fully qualified path
+                dataSource
+              case Failure(error) =>
+                if (provider.toLowerCase == "orc" ||
                   provider.startsWith("org.apache.spark.sql.hive.orc")) {
-                throw new AnalysisException(
-                  "The ORC data source must be used with Hive support enabled")
-              } else if (provider.toLowerCase == "avro" ||
+                  throw new AnalysisException(
+                    "The ORC data source must be used with Hive support enabled")
+                } else if (provider.toLowerCase == "avro" ||
                   provider == "com.databricks.spark.avro") {
-                throw new AnalysisException(
-                  s"Failed to find data source: ${provider.toLowerCase}. Please use Spark
" +
-                    "package http://spark-packages.org/package/databricks/spark-avro")
+                  throw new AnalysisException(
+                    s"Failed to find data source: ${provider.toLowerCase}. Please use Spark
" +
+                      "package http://spark-packages.org/package/databricks/spark-avro")
+                } else {
+                  throw new ClassNotFoundException(
+                    s"Failed to find data source: $provider. Please find packages at " +
+                      "http://spark-packages.org",
+                    error)
+                }
+            }
+          } catch {
+            case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
+              // NoClassDefFoundError's class name uses "/" rather than "." for packages
+              val className = e.getMessage.replaceAll("/", ".")
+              if (spark2RemovedClasses.contains(className)) {
+                throw new ClassNotFoundException(s"$className was removed in Spark 2.0. "
+
+                  "Please check if your library is compatible with Spark 2.0", e)
               } else {
-                throw new ClassNotFoundException(
-                  s"Failed to find data source: $provider. Please find packages at " +
-                    "http://spark-packages.org",
-                  error)
+                throw e
               }
           }
-        } catch {
-          case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
-            // NoClassDefFoundError's class name uses "/" rather than "." for packages
-            val className = e.getMessage.replaceAll("/", ".")
-            if (spark2RemovedClasses.contains(className)) {
-              throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
-                "Please check if your library is compatible with Spark 2.0", e)
-            } else {
-              throw e
-            }
+        case head :: Nil =>
+          // there is exactly one registered alias
+          head.getClass
+        case sources =>
+          // There are multiple registered aliases for the input
+          sys.error(s"Multiple sources found for $provider " +
+            s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
+            "please specify the fully qualified class name.")
+      }
+    } catch {
+      case e: ServiceConfigurationError if e.getCause.isInstanceOf[NoClassDefFoundError]
=>
+        // NoClassDefFoundError's class name uses "/" rather than "." for packages
+        val className = e.getCause.getMessage.replaceAll("/", ".")
+        if (spark2RemovedClasses.contains(className)) {
+          throw new ClassNotFoundException(s"Detected an incompatible DataSourceRegister.
" +
+            "Please remove the incompatible library from classpath or upgrade it. " +
+            s"Error: ${e.getMessage}", e)
+        } else {
+          throw e
         }
-      case head :: Nil =>
-        // there is exactly one registered alias
-        head.getClass
-      case sources =>
-        // There are multiple registered aliases for the input
-        sys.error(s"Multiple sources found for $provider " +
-          s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
-          "please specify the fully qualified class name.")
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message