spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Marcelo Vanzin (JIRA)" <j...@apache.org>
Subject [jira] [Resolved] (SPARK-11851) Unable to start spark thrift server against secured hive metastore(GSS initiate failed)
Date Wed, 09 May 2018 20:20:00 GMT

     [ https://issues.apache.org/jira/browse/SPARK-11851?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Marcelo Vanzin resolved SPARK-11851.
------------------------------------
    Resolution: Duplicate

> Unable to start spark thrift server against secured hive metastore(GSS initiate failed)
> ---------------------------------------------------------------------------------------
>
>                 Key: SPARK-11851
>                 URL: https://issues.apache.org/jira/browse/SPARK-11851
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.5.0, 1.5.2
>         Environment: Centos x64
>            Reporter: bit1129
>            Priority: Critical
>
> I am using Spark 1.5.X to work with Hive 0.14.0. 
> a. spark-defaults.conf:
> {code}
> spark.sql.hive.metastore.version 0.14.0
> spark.sql.hive.metastore.jars hadoop 2.6.0 jars:hive 0.14.0 jars
> {code}
> b. hive-site.conf
> {code}
> <?xml version="1.0"?>
> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
> <configuration>
>   <property>
>     <name>javax.jdo.option.ConnectionURL</name>
>     <!--<value>jdbc:mysql://my.domain/metastore</value> -->
>     <value>jdbc:mysql://my.domain/metestore2</value>
>     <description>the URL of the MySQL database</description>
>   </property>
>   <property>
>     <name>javax.jdo.option.ConnectionDriverName</name>
>     <value>com.mysql.jdbc.Driver</value>
>   </property>
>   <property>
>     <name>javax.jdo.option.ConnectionUserName</name>
>     <value>hive</value>
>   </property>
>   <property>
>     <name>javax.jdo.option.ConnectionPassword</name>
>     <value>hive</value>
>   </property>
>   <property>
>     <name>datanucleus.autoCreateSchema</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>datanucleus.fixedDatastore</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>datanucleus.autoStartMechanism</name>
>     <value>SchemaTable</value>
>   </property>
>   <property>
>     <name>hive.exec.max.dynamic.partitions</name>
>     <value>100000</value>
>   </property>
>   <property>
>     <name>hive.exec.max.dynamic.partitions.pernode</name>
>     <value>10000</value>
>   </property>
>   <!-- rename bug workaround https://issues.apache.org/jira/browse/HIVE-3815 -->
>   <property>
>     <name>fs.hdfs.impl.disable.cache</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>fs.file.impl.disable.cache</name>
>     <value>false</value>
>   </property>
>   <!-- memory leak workaround https://issues.apache.org/jira/browse/HIVE-4501-->
>    <property>
>     <name>hive.server2.thrift.http.max.worker.threads</name>
>     <value>5000</value>
>   </property>
>   <property>
>     <name>hive.metastore.warehouse.dir</name>
>     <value>hdfs:/user/hive/warehouse</value>
>   </property>
>   <property>
>     <name>hive.exec.max.dynamic.partitions.pernode</name>
>     <value>10000</value>
>   </property>
>   <property>
>     <name>hive.exec.max.dynamic.partitions</name>
>     <value>10000</value>
>   </property>
>   <property>
>     <name>mapred.output.compress</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>hive.exec.compress.output</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>mapred.output.compression.type</name>
>     <value>BLOCK</value>
>   </property>
>   <property>
>     <name>mapreduce.input.fileinputformat.split.minsize</name>
>     <value>134217728</value>
>   </property>
>   <property>
>     <name>mapreduce.input.fileinputformat.split.maxsize</name>
>     <value>1000000000</value>
>   </property>
>   <property>
>     <name>mapred.child.java.opts</name>
>     <value>-Xmx1024m</value>
>   </property>
>   <property>
>     <name>mapreduce.map.memory.mb</name>
>     <value>1024</value>
>   </property>
>   <property>
>     <name>mapreduce.reduce.memory.mb</name>
>     <value>1024</value>
>   </property>
>   <!--
>   <property>
>     <name>hive.mapred.map.tasks.speculative.execution</name>
>     <value>false</value>
>   </property>
>   -->
>   <property>
>     <name>hive.mapred.reduce.tasks.speculative.execution</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>mapred.map.tasks.speculative.execution</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>mapred.reduce.tasks.speculative.execution</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>mapreduce.job.queuename</name>
>     <value>mapreduce</value>
>   </property>
>   <property>
>     <name>hive.metastore.client.socket.timeout</name>
>     <value>600</value>
>   </property>
>   <property>
>     <name>hive.auto.convert.join.noconditionaltask.size</name>
>     <value>671088000</value>
>   </property>
>   
>   <property>
>     <name>hive.server2.authentication</name>
>     <value>KERBEROS</value>
>   </property>
>   <property>
>     <name>hive.server2.authentication.kerberos.principal</name>
>     <value>hive/_HOST@HADOOP.HAP</value>
>   </property>
>   <property>
>     <name>hive.server2.authentication.kerberos.keytab</name>
>     <value>/tmp/hive.keytab</value>
>   </property>
>   <property>
>     <name>hive.metastore.sasl.enabled</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>hive.metastore.kerberos.keytab.file</name>
>     <value>/export/keytabs_conf/hive.keytab</value>
>   </property>
>   <property>
>     <name>hive.metastore.kerberos.principal</name>
>     <value>hive/_HOST@HADOOP.HAP</value>
>   </property>
>   <property>
>     <name>hive.metastore.uris</name>
>     <value>thrift://my.domain:9083</value>
>   </property>
>   <property>
>     <name>hive.server2.support.dynamic.service.discovery</name>
>     <value>true</value>
>   </property>
>  
>   <!--hive security-->
>   <property>
>     <name>hive.security.authorization.enabled</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>hive.security.authorization.createtable.owner.grants</name>
>     <value>ALL</value>
>   </property>
>   <property>
>     <name>hive.security.authorization.task.factory</name>
>     <value>org.apache.hadoop.hive.ql.parse.authorization.HiveAuthorizationTaskFactoryImpl</value>
>   </property>
>   <property>
>     <name>hive.server2.enable.doAs</name>
>     <value>false</value>
>   </property>
>   <property>
>     <name>hive.warehouse.subdir.inherit.perms</name>
>     <value>true</value>
>   </property>
>   <!-- hive Storage Based Authorization-->
>   <!--
>   <property>
>     <name>hive.metastore.pre.event.listeners</name>
>     <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
>   </property>
>   <property>
>     <name>hive.security.metastore.authorization.manager</name>
>     <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
>   </property>
>   <property>
>     <name>hive.security.metastore.authenticator.manager</name>
>     <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
>   </property>
>   <property>
>     <name>hive.security.metastore.authorization.auth.reads</name>
>     <value>true</value>
>   </property>
>   -->
>   <!--  SQL Standard Based Hive Authorization-->
>   <property>
>     <name>hive.users.in.admin.role</name>
>     <value>hive,test109</value>
>   </property>
>   <property>
>     <name>hive.security.authorization.manager</name>
>     <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
>   </property>
>   <property>
>     <name>hive.security.authenticator.manager</name>
>     <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
>   </property>
>   <property>
>     <name>hive.server2.map.fair.scheduler.queue</name>
>     <value>false</value>
>   </property>
>   
>   <!-- https://issues.apache.org/jira/browse/SPARK-11021 -->
>     <property>
>         <name>hive.exec.stagingdir</name>
>         <value>/tmp/hive/spark-stagingdir</value>
>     </property>
>   
> </configuration>
> {code}
> The steps to startup spark 1.5.x
> 1. 
> {code}
> kinit -kt /tmp/xx.keytab hive/xxx
> {code}
> 2.Startup Spark Thrift Server
> {code}
> sbin/start-thriftserver.sh --master yarn-client --num-executors 2
> {code}
> Following exception is thrown during startup
> {code}
> 15/11/19 15:39:59 ERROR TSaslTransport: SASL negotiation failure
> javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid
credentials provided (Mechanism level: Failed to find any Kerberos tgt)]
> 	at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:212)
> 	at org.apache.thrift.transport.TSaslClientTransport.handleSaslStartMessage(TSaslClientTransport.java:94)
> 	at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:253)
> 	at org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
> 	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:52)
> 	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:49)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at javax.security.auth.Subject.doAs(Subject.java:415)
> 	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
> 	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport.open(TUGIAssumingTransport.java:49)
> 	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:358)
> 	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:215)
> 	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:73)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> 	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> 	at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> 	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1447)
> 	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:63)
> 	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:73)
> 	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2661)
> 	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2680)
> 	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:425)
> 	at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:171)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
> 	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> 	at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
> 	at org.apache.spark.sql.hive.client.IsolatedClientLoader.liftedTree1$1(IsolatedClientLoader.scala:183)
> 	at org.apache.spark.sql.hive.client.IsolatedClientLoader.<init>(IsolatedClientLoader.scala:179)
> 	at org.apache.spark.sql.hive.HiveContext.metadataHive$lzycompute(HiveContext.scala:264)
> 	at org.apache.spark.sql.hive.HiveContext.metadataHive(HiveContext.scala:186)
> 	at org.apache.spark.sql.hive.HiveContext.setConf(HiveContext.scala:393)
> 	at org.apache.spark.sql.SQLContext$$anonfun$5.apply(SQLContext.scala:229)
> 	at org.apache.spark.sql.SQLContext$$anonfun$5.apply(SQLContext.scala:228)
> 	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
> 	at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
> 	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
> 	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
> 	at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:228)
> 	at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:72)
> 	at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:58)
> 	at org.apache.spark.sql.hive.thriftserver.HiveThriftServer2$.main(HiveThriftServer2.scala:77)
> 	at org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.main(HiveThriftServer2.scala)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:606)
> 	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
> 	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
> 	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
> 	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
> 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: GSSException: No valid credentials provided (Mechanism level: Failed to find
any Kerberos tgt)
> 	at sun.security.jgss.krb5.Krb5InitCredential.getInstance(Krb5InitCredential.java:147)
> 	at sun.security.jgss.krb5.Krb5MechFactory.getCredentialElement(Krb5MechFactory.java:121)
> 	at sun.security.jgss.krb5.Krb5MechFactory.getMechanismContext(Krb5MechFactory.java:187)
> 	at sun.security.jgss.GSSManagerImpl.getMechanismContext(GSSManagerImpl.java:223)
> 	at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:212)
> 	at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:179)
> 	at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:193)
> 	... 52 more
> 15/11/19 15:39:59 WARN metastore: Failed to connect to the MetaStore Server...
> 15/11/19 15:39:59 INFO metastore: Waiting 1 seconds before next connection attempt.
> {code}
> Note: If I don't configure the spark.sql.hive.metastore things in spark-defaults.conf,
not surprising that there is version incompatible issue when I do DML,but I can startup the
spark thrift server and pass the  kerberos authentication 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message