pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject svn commit: r1735517 - in /pig/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/start.xml src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
Date Thu, 17 Mar 2016 22:49:56 GMT
Author: daijy
Date: Thu Mar 17 22:49:55 2016
New Revision: 1735517

URL: http://svn.apache.org/viewvc?rev=1735517&view=rev
PIG-4796: Authenticate with Kerberos using a keytab file


Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1735517&r1=1735516&r2=1735517&view=diff
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Mar 17 22:49:55 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
+PIG-4796: Authenticate with Kerberos using a keytab file (nielsbasjes via daijy)
 PIG-4817: Bump HTTP Logparser to version 2.4 (nielsbasjes via daijy)
 PIG-4811: Upgrade groovy library to address MethodClosure vulnerability	(daijy)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/start.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/start.xml?rev=1735517&r1=1735516&r2=1735517&view=diff
--- pig/trunk/src/docs/src/documentation/content/xdocs/start.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/start.xml Thu Mar 17 22:49:55 2016
@@ -299,6 +299,60 @@ $ pig hdfs://nn.mydomain.com:9020/myscri
   <!-- ==================================================================== -->
+  <section id="kerberos">
+     <title>Running jobs on a Kerberos secured cluster</title>
+     <p>Kerberos is a authentication system that uses tickets with a limited validity
+        As a consequence running a pig script on a kerberos secured hadoop cluster limits
the running time to at most
+        the remaining validity time of these kerberos tickets. When doing really complex
analytics this may become a
+        problem as the job may need to run for a longer time than these ticket times allow.</p>
+     <section id="kerberos-short">
+     <title>Short lived jobs</title>
+     <p>When running short jobs all you need to do is ensure that the user has been
logged in into Kerberos via the
+        normal kinit method.<br/>
+        The Hadoop job will automatically pickup these credentials and the job will run fine.</p>
+     </section>
+     <section id="kerberos-long">
+     <title>Long lived jobs</title>
+     <p>A kerberos keytab file is essentially a Kerberos specific form of the password
of a user. <br/>
+        It is possible to enable a Hadoop job to request new tickets when they expire by
creating a keytab file and
+        make it part of the job that is running in the cluster.
+        This will extend the maximum job duration beyond the maximum renew time of the kerberos
+     <p>Usage:</p>
+     <ol>
+        <li>Create a keytab file for the required principal.<br/>
+           Using the ktutil tool you can create a keytab using roughly these commands:<br/>
+           <source>addent -password -p niels@EXAMPLE.NL -k 1 -e rc4-hmac
+addent -password -p niels@EXAMPLE.NL -k 1 -e aes256-cts
+wkt niels.keytab</source>
+        </li>
+        <li>Set the following properties (either via the .pigrc file or on the command
line via -P file)
+            <ul>
+            <li><em>java.security.krb5.conf</em><br/>
+                The path to the local krb5.conf file.<br/>
+                Usually this is "/etc/krb5.conf"</li>
+            <li><em>hadoop.security.krb5.principal</em><br/>
+                The pricipal you want to login with.<br/>
+                Usually this would look like this "niels@EXAMPLE.NL"</li>
+            <li><em>hadoop.security.krb5.keytab</em><br/>
+                The path to the local keytab file that must be used to authenticate with.<br/>
+                Usually this would look like this "/home/niels/.krb/niels.keytab"</li>
+            </ul></li>
+     </ol>
+     <p><strong>NOTE:</strong>All paths in these variables are local to
the client system starting the actual pig script.
+        This can be run without any special access to the cluster nodes.</p>
+         <p>Overall you would create a file that looks like this (assume we call it
+         <source>java.security.krb5.conf=/etc/krb5.conf
+         <p>and start your script like this:</p>
+         <source>pig -P niels.kerberos.properties script.pig</source>
+     </section>
+  </section>
+  <!-- ==================================================================== -->
    <section id="pl-statements">

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=1735517&r1=1735516&r2=1735517&view=diff
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Thu
Mar 17 22:49:55 2016
@@ -37,6 +37,7 @@ import org.apache.pig.backend.BackendExc
 import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.backend.executionengine.ExecutionEngine;
+import org.apache.pig.backend.hadoop.HKerberos;
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
 import org.apache.pig.backend.hadoop.executionengine.fetch.FetchLauncher;
@@ -215,6 +216,9 @@ public abstract class HExecutionEngine i
         // the properties
         Utils.recomputeProperties(jc, properties);
+        // Ensure we have been logged in using the kerberos keytab (if provided) before continuing.
+        HKerberos.tryKerberosKeytabLogin(jc);
         cluster = jc.get(MRConfiguration.JOB_TRACKER);
         nameNode = jc.get(FILE_SYSTEM_LOCATION);
         if (nameNode == null) {

View raw message