spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mich Talebzadeh <mich.talebza...@gmail.com>
Subject Re: SQLContext and HiveContext parse a query string differently ?
Date Thu, 12 May 2016 21:20:29 GMT
yep the same error I got

root
 |-- a: array (nullable = true)
 |    |-- element: integer (containsNull = false)
 |-- b: integer (nullable = false)
NoViableAltException(35@[])
        at
org.apache.hadoop.hive.ql.parse.HiveParser.primitiveType(HiveParser.java:38886)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.castExpression(HiveParser_IdentifiersParser.java:4336)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.atomExpression(HiveParser_IdentifiersParser.java:6235)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceFieldExpression(HiveParser_IdentifiersParser.java:6383)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceUnaryPrefixExpression(HiveParser_IdentifiersParser.java:6768)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceUnarySuffixExpression(HiveParser_IdentifiersParser.java:6828)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceBitwiseXorExpression(HiveParser_IdentifiersParser.java:7012)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceStarExpression(HiveParser_IdentifiersParser.java:7172)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedencePlusExpression(HiveParser_IdentifiersParser.java:7332)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceAmpersandExpression(HiveParser_IdentifiersParser.java:7483)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceBitwiseOrExpression(HiveParser_IdentifiersParser.java:7634)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceEqualExpression(HiveParser_IdentifiersParser.java:8164)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceNotExpression(HiveParser_IdentifiersParser.java:9177)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceAndExpression(HiveParser_IdentifiersParser.java:9296)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.precedenceOrExpression(HiveParser_IdentifiersParser.java:9455)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.expression(HiveParser_IdentifiersParser.java:6105)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.expression(HiveParser.java:45846)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectItem(HiveParser_SelectClauseParser.java:2907)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectList(HiveParser_SelectClauseParser.java:1373)
        at
org.apache.hadoop.hive.ql.parse.HiveParser_SelectClauseParser.selectClause(HiveParser_SelectClauseParser.java:1128)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.selectClause(HiveParser.java:45817)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.selectStatement(HiveParser.java:41495)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.regularBody(HiveParser.java:41402)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpressionBody(HiveParser.java:40413)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.queryStatementExpression(HiveParser.java:40283)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.execStatement(HiveParser.java:1590)
        at
org.apache.hadoop.hive.ql.parse.HiveParser.statement(HiveParser.java:1109)
        at
org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:202)
        at
org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:166)
        at org.apache.spark.sql.hive.HiveQl$.getAst(HiveQl.scala:276)
        at org.apache.spark.sql.hive.HiveQl$.createPlan(HiveQl.scala:303)
        at
org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:41)
        at
org.apache.spark.sql.hive.ExtendedHiveQlParser$$anonfun$hiveQl$1.apply(ExtendedHiveQlParser.scala:40)
        at
scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136)
        at
scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
        at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
        at
scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
        at
scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
        at
scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890)
        at
scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110)
        at
org.apache.spark.sql.catalyst.AbstractSparkSQLParser.parse(AbstractSparkSQLParser.scala:34)
        at org.apache.spark.sql.hive.HiveQl$.parseSql(HiveQl.scala:295)
        at
org.apache.spark.sql.hive.HiveQLDialect$$anonfun$parse$1.apply(HiveContext.scala:66)
        at
org.apache.spark.sql.hive.HiveQLDialect$$anonfun$parse$1.apply(HiveContext.scala:66)
        at
org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290)
        at
org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237)
        at
org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236)
        at
org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279)
        at
org.apache.spark.sql.hive.HiveQLDialect.parse(HiveContext.scala:65)
        at
org.apache.spark.sql.SQLContext$$anonfun$2.apply(SQLContext.scala:211)
        at
org.apache.spark.sql.SQLContext$$anonfun$2.apply(SQLContext.scala:211)
        at
org.apache.spark.sql.execution.SparkSQLParser$$anonfun$org$apache$spark$sql$execution$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:114)
        at
org.apache.spark.sql.execution.SparkSQLParser$$anonfun$org$apache$spark$sql$execution$SparkSQLParser$$others$1.apply(SparkSQLParser.scala:113)
        at
scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:136)
        at
scala.util.parsing.combinator.Parsers$Success.map(Parsers.scala:135)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$map$1.apply(Parsers.scala:242)
        at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1$$anonfun$apply$2.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Failure.append(Parsers.scala:202)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$Parser$$anonfun$append$1.apply(Parsers.scala:254)
        at
scala.util.parsing.combinator.Parsers$$anon$3.apply(Parsers.scala:222)
        at
scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
        at
scala.util.parsing.combinator.Parsers$$anon$2$$anonfun$apply$14.apply(Parsers.scala:891)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
        at
scala.util.parsing.combinator.Parsers$$anon$2.apply(Parsers.scala:890)
        at
scala.util.parsing.combinator.PackratParsers$$anon$1.apply(PackratParsers.scala:110)
        at
org.apache.spark.sql.catalyst.AbstractSparkSQLParser.parse(AbstractSparkSQLParser.scala:34)
        at
org.apache.spark.sql.SQLContext$$anonfun$1.apply(SQLContext.scala:208)
        at
org.apache.spark.sql.SQLContext$$anonfun$1.apply(SQLContext.scala:208)
        at
org.apache.spark.sql.execution.datasources.DDLParser.parse(DDLParser.scala:43)
        at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:231)
        at
org.apache.spark.sql.hive.HiveContext.parseSql(HiveContext.scala:331)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
        at Test$delayedInit$body.apply(Test.scala:21)
        at scala.Function0$class.apply$mcV$sp(Function0.scala:40)
        at
scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
        at scala.App$$anonfun$main$1.apply(App.scala:71)
        at scala.App$$anonfun$main$1.apply(App.scala:71)
        at scala.collection.immutable.List.foreach(List.scala:318)
        at
scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
        at scala.App$class.main(App.scala:71)
        at Test$.main(Test.scala:5)
        at Test.main(Test.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
        at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
        at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot
recognize input near 'array' '<' 'string' in primitive type specification;
line 1 pos 17


Let me investigate it further


Dr Mich Talebzadeh



LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*



http://talebzadehmich.wordpress.com



On 12 May 2016 at 12:09, Hao Ren <invkrh@gmail.com> wrote:

> HI,
>
> I just want to figure out why the two contexts behavior differently even
> on a simple query.
> In a netshell, I have a query in which there is a String containing single
> quote and casting to Array/Map.
> I have tried all the combination of diff type of sql context and query
> call api (sql, df.select, df.selectExpr).
> I can't find one rules all.
>
> Here is the code for reproducing the problem.
>
> -----------------------------------------------------------------------------
>
> import org.apache.spark.sql.SQLContext
> import org.apache.spark.sql.hive.HiveContext
> import org.apache.spark.{SparkConf, SparkContext}
>
> object Test extends App {
>
>   val sc          = new SparkContext("local[2]", "test", new SparkConf)
>   val hiveContext = new HiveContext(sc)
>   val sqlContext  = new SQLContext(sc)
>
>   val context = hiveContext
>   //  val context = sqlContext
>
>   import context.implicits._
>
>   val df = Seq((Seq(1, 2), 2)).toDF("a", "b")
>   df.registerTempTable("tbl")
>   df.printSchema()
>
>   // case 1
>   context.sql("select cast(a as array<string>) from tbl").show()
>   // HiveContext => org.apache.spark.sql.AnalysisException: cannot recognize input
near 'array' '<' 'string' in primitive type specification; line 1 pos 17
>   // SQLContext => OK
>
>   // case 2
>   context.sql("select 'a\\'b'").show()
>   // HiveContext => OK
>   // SQLContext => failure: ``union'' expected but ErrorToken(unclosed string literal)
found
>
>   // case 3
>   df.selectExpr("cast(a as array<string>)").show() // OK with HiveContext and SQLContext
>
>   // case 4
>   df.selectExpr("'a\\'b'").show() // HiveContext, SQLContext => failure: end of input
expected
> }
>
> -----------------------------------------------------------------------------
>
> Any clarification / workaround is high appreciated.
>
> --
> Hao Ren
>
> Data Engineer @ leboncoin
>
> Paris, France
>

Mime
View raw message