apex-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sas...@apache.org
Subject incubator-apex-site git commit: Adding apex-3.3 documentation
Date Tue, 22 Mar 2016 01:46:19 GMT
Repository: incubator-apex-site
Updated Branches:
  refs/heads/asf-site 9c8d8d89e -> c7ee6de7a


Adding apex-3.3 documentation


Project: http://git-wip-us.apache.org/repos/asf/incubator-apex-site/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-apex-site/commit/c7ee6de7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-apex-site/tree/c7ee6de7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-apex-site/diff/c7ee6de7

Branch: refs/heads/asf-site
Commit: c7ee6de7a9805f67649f7effc20a81d3b8979271
Parents: 9c8d8d8
Author: sashadt <sasha@datatorrent.com>
Authored: Mon Mar 21 18:46:11 2016 -0700
Committer: sashadt <sasha@datatorrent.com>
Committed: Mon Mar 21 18:46:11 2016 -0700

----------------------------------------------------------------------
 docs/apex-3.3/apex_development_setup/index.html |   7 +
 .../apex-3.3/application_development/index.html |   7 +
 docs/apex-3.3/application_packages/index.html   |   7 +
 docs/apex-3.3/autometrics/index.html            |   7 +
 docs/apex-3.3/compatibility/index.html          |  11 +-
 docs/apex-3.3/dtcli/index.html                  |  11 +-
 docs/apex-3.3/images/security/image00.png       | Bin 0 -> 31624 bytes
 docs/apex-3.3/images/security/image02.png       | Bin 0 -> 11594 bytes
 docs/apex-3.3/index.html                        |   9 +-
 docs/apex-3.3/mkdocs/search_index.json          |  90 ++++++
 docs/apex-3.3/operator_development/index.html   |   7 +
 docs/apex-3.3/search.html                       |   7 +
 docs/apex-3.3/security/index.html               | 322 +++++++++++++++++++
 docs/apex-3.3/sitemap.xml                       |  22 +-
 14 files changed, 494 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/apex_development_setup/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/apex_development_setup/index.html b/docs/apex-3.3/apex_development_setup/index.html
index c3029c0..5033893 100644
--- a/docs/apex-3.3/apex_development_setup/index.html
+++ b/docs/apex-3.3/apex_development_setup/index.html
@@ -132,6 +132,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/application_development/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/application_development/index.html b/docs/apex-3.3/application_development/index.html
index d0bc30b..b7b3faa 100644
--- a/docs/apex-3.3/application_development/index.html
+++ b/docs/apex-3.3/application_development/index.html
@@ -198,6 +198,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/application_packages/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/application_packages/index.html b/docs/apex-3.3/application_packages/index.html
index 4ca9434..cd0eea7 100644
--- a/docs/apex-3.3/application_packages/index.html
+++ b/docs/apex-3.3/application_packages/index.html
@@ -144,6 +144,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/autometrics/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/autometrics/index.html b/docs/apex-3.3/autometrics/index.html
index d5ffea8..d52c369 100644
--- a/docs/apex-3.3/autometrics/index.html
+++ b/docs/apex-3.3/autometrics/index.html
@@ -143,6 +143,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/compatibility/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/compatibility/index.html b/docs/apex-3.3/compatibility/index.html
index fc88b80..42ded9a 100644
--- a/docs/apex-3.3/compatibility/index.html
+++ b/docs/apex-3.3/compatibility/index.html
@@ -117,6 +117,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           
@@ -244,7 +251,7 @@ The <a href="https://github.com/siom79/japicmp">japicmp Maven plugin</a> is used
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
       
       
-        <a href="../dtcli/" class="btn btn-neutral" title="dtCli"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+        <a href="../security/" class="btn btn-neutral" title="Security"><span class="icon icon-circle-arrow-left"></span> Previous</a>
       
     </div>
   
@@ -270,7 +277,7 @@ The <a href="https://github.com/siom79/japicmp">japicmp Maven plugin</a> is used
     <span class="rst-current-version" data-toggle="rst-current-version">
       
       
-        <span><a href="../dtcli/" style="color: #fcfcfc;">&laquo; Previous</a></span>
+        <span><a href="../security/" style="color: #fcfcfc;">&laquo; Previous</a></span>
       
       
     </span>

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/dtcli/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/dtcli/index.html b/docs/apex-3.3/dtcli/index.html
index bb63fab..1aa395e 100644
--- a/docs/apex-3.3/dtcli/index.html
+++ b/docs/apex-3.3/dtcli/index.html
@@ -128,6 +128,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           
@@ -426,7 +433,7 @@ they must be part of the jar files that were deployed at application launch time
   
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
       
-        <a href="../compatibility/" class="btn btn-neutral float-right" title="Compatibility">Next <span class="icon icon-circle-arrow-right"></span></a>
+        <a href="../security/" class="btn btn-neutral float-right" title="Security">Next <span class="icon icon-circle-arrow-right"></span></a>
       
       
         <a href="../autometrics/" class="btn btn-neutral" title="AutoMetric API"><span class="icon icon-circle-arrow-left"></span> Previous</a>
@@ -458,7 +465,7 @@ they must be part of the jar files that were deployed at application launch time
         <span><a href="../autometrics/" style="color: #fcfcfc;">&laquo; Previous</a></span>
       
       
-        <span style="margin-left: 15px"><a href="../compatibility/" style="color: #fcfcfc">Next &raquo;</a></span>
+        <span style="margin-left: 15px"><a href="../security/" style="color: #fcfcfc">Next &raquo;</a></span>
       
     </span>
 </div>

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/images/security/image00.png
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/images/security/image00.png b/docs/apex-3.3/images/security/image00.png
new file mode 100644
index 0000000..26088b0
Binary files /dev/null and b/docs/apex-3.3/images/security/image00.png differ

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/images/security/image02.png
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/images/security/image02.png b/docs/apex-3.3/images/security/image02.png
new file mode 100644
index 0000000..65d30a8
Binary files /dev/null and b/docs/apex-3.3/images/security/image02.png differ

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/index.html b/docs/apex-3.3/index.html
index 766851f..9d9b251 100644
--- a/docs/apex-3.3/index.html
+++ b/docs/apex-3.3/index.html
@@ -124,6 +124,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           
@@ -225,5 +232,5 @@
 
 <!--
 MkDocs version : 0.15.3
-Build Date UTC : 2016-03-19 02:17:55.330407
+Build Date UTC : 2016-03-22 01:44:26.357028
 -->

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/mkdocs/search_index.json
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/mkdocs/search_index.json b/docs/apex-3.3/mkdocs/search_index.json
index cc08c8a..0f2b244 100644
--- a/docs/apex-3.3/mkdocs/search_index.json
+++ b/docs/apex-3.3/mkdocs/search_index.json
@@ -831,6 +831,96 @@
             "title": "Examples"
         }, 
         {
+            "location": "/security/", 
+            "text": "Security\n\n\nApplications built on Apex run as native YARN applications on Hadoop. The security framework and apparatus in Hadoop apply to the applications. The default security mechanism in Hadoop is Kerberos.\n\n\nKerberos Authentication\n\n\nKerberos is a ticket based authentication system that provides authentication in a distributed environment where authentication is needed between multiple users, hosts and services. It is the de-facto authentication mechanism supported in Hadoop. To use Kerberos authentication, the Hadoop installation must first be configured for secure mode with Kerberos. Please refer to the administration guide of your Hadoop distribution on how to do that. Once Hadoop is configured, there is some configuration needed on Apex side as well.\n\n\nConfiguring security\n\n\nThere is Hadoop configuration and CLI configuration. Hadoop configuration may be optional.\n\n\nHadoop Configuration\n\n\nAn Apex application uses delegation tokens to 
 authenticte with the ResourceManager (YARN) and NameNode (HDFS) and these tokens are issued by those servers respectively. Since the application is long-running,\nthe tokens should be valid for the lifetime of the application. Hadoop has a configuration setting for the maximum lifetime of the tokens and they should be set to cover the lifetime of the application. There are separate settings for ResourceManager and NameNode delegation\ntokens.\n\n\nThe ResourceManager delegation token max lifetime is specified in \nyarn-site.xml\n and can be specified as follows for example for a lifetime of 1 year\n\n\nproperty\n\n  \nname\nyarn.resourcemanager.delegation.token.max-lifetime\n/name\n\n  \nvalue\n31536000000\n/value\n\n\n/property\n\n\n\n\n\nThe NameNode delegation token max lifetime is specified in\nhdfs-site.xml and can be specified as follows for example for a lifetime of 1 year\n\n\nproperty\n\n   \nname\ndfs.namenode.delegation.token.max-lifetime\n/name\n\n   \nvalue\n31536000000
 \n/value\n\n \n/property\n\n\n\n\n\nCLI Configuration\n\n\nThe Apex command line interface is used to launch\napplications along with performing various other operations and administrative tasks on the applications. \u00a0When Kerberos security is enabled in Hadoop, a Kerberos ticket granting ticket (TGT) or the Kerberos credentials of the user are needed by the CLI program \ndtcli\n to authenticate with Hadoop for any operation. Kerberos credentials are composed of a principal and either a \nkeytab\n or a password. For security and operational reasons only keytabs are supported in Hadoop and by extension in Apex platform. When user credentials are specified, all operations including launching\napplication are performed as that user.\n\n\nUsing kinit\n\n\nA Keberos ticket granting ticket (TGT) can be obtained by using the Kerberos command \nkinit\n. Detailed documentation for the command can be found online or in man pages. An sample usage of this command is\n\n\nkinit -k -t path-to
 keytab-file kerberos-principal\n\n\n\nIf this command is successful, the TGT is obtained, cached and available for other programs. The CLI program \ndtcli\n can then be started to launch applications and perform other operations.\n\n\nUsing Kerberos credentials\n\n\nThe CLI program \ndtcli\n can also use the Kerberos credentials directly without requiring a TGT to be obtained separately. This can be useful in batch mode where \ndtcli\n is not launched manually and also in scenarios where running another program like \nkinit\n is not feasible.\n\n\nThe credentials can be specified in the \ndt-site.xml\n configuration file. If only a single user is launching applications, the global \ndt-site.xml\n configuration file in the installation folder can be used. In a multi-user environment the users can use the \ndt-site.xml\n file in their\nhome directory. The location of this file will be \n$HOME/.dt/dt-site.xml\n. If this file does not exist, the user can create a new one.\n\n\nThe snipp
 et below shows the how the credentials can be specified in the configuration file as properties.\n\n\nproperty\n\n        \nname\ndt.authentication.principal\n/name\n\n        \nvalue\nkerberos-principal-of-user\n/value\n\n\n/property\n\n\nproperty\n\n        \nname\ndt.authentication.keytab\n/name\n\n        \nvalue\nabsolute-path-to-keytab-file\n/value\n\n\n/property\n\n\n\n\n\nThe property \ndt.authentication.principal\n specifies the Kerberos user principal and \ndt.authentication.keytab\n specifies the absolute path to the keytab file for the user.\n\n\nThe subsequent sections talk about how security works in Apex. This information is not needed by users but is intended for the inquisitive techical audience who want to know how security works.\n\n\nSecurity architecture\n\n\nIn this section we will see how security works for applications built on Apex. We will look at the different methodologies involved in running the applications and in each case we will look into the differe
 nt components that are involved. We will go into the architecture of these components and look at the different security mechanisms that are in play.\n\n\nApplication Launch\n\n\nTo launch applications in Apache Apex the command line client dtcli can be used. The application artifacts such as binaries and properties are supplied as an application package. The client, during the various steps involved to launch the application needs to communicate with both the Resource Manager and the Name Node. The Resource Manager communication involves the client asking for new resources to run the application master and start the application launch process. The steps along with sample Java code are described in Writing YARN Applications. The Name Node communication includes the application artifacts being copied to HDFS so that they are available across the cluster for launching the different application containers.\n\n\nIn secure mode the communications with both Resource Manager and Name Node 
 requires authentication and the mechanism is Kerberos. Below is an illustration showing this.\n\n\n        \n\n\nThe client dtcli supports Kerberos authentication and will automatically enable it in a secure environment. To authenticate, some Kerberos configuration namely the Kerberos credentials, are needed by the client. There are two parameters, the Kerberos principal and keytab to use for the client. These can be specified in the dt-site.xml configuration file. The properties are shown below\n\n\n    \nproperty\n\n            \nname\ndt.authentication.principal\n/name\n\n            \nvalue\nkerberos-principal-of-user\n/value\n\n    \n/property\n\n    \nproperty\n\n            \nname\ndt.authentication.keytab\n/name\n\n            \nvalue\nabsolute-path-to-keytab-file\n/value\n\n    \n/property\n\n\n\n\nRefer to document Operation and Installation Guide section Multi Tenancy and Security subsection CLI Configuration in the documentation for more information. The document can als
 o be accessed here client configuration\n\n\nThere is another important functionality that is performed by the client and that is to retrieve what are called delegation tokens from the Resource Manager and Name Node to seed the application master container that is to be launched. This is detailed in the next section. \n\n\nRuntime Security\n\n\nWhen the application is completely up and running, there are different components of the application running as separate processes possibly on different nodes in the cluster as it is a distributed application. These components interactwould be interacting with each other and the Hadoop services. In secure mode, all these interactions have to be authenticated before they can be successfully processed. The interactions are illustrated below in a diagram to give a complete overview. Each of them is explained in subsequent sections.\n\n\n\n\nSTRAM and Hadoop\n\n\nEvery Apache Apex application has a master process akin to any YARN application. In 
 our case it is called STRAM (Streaming Application Master). It is a master process that runs in its own container and manages the different distributed components of the application. Among other tasks it requests Resource Manager for new resources as they are needed and gives back resources that are no longer needed. STRAM also needs to communicate with Name Node from time-to-time to access the persistent HDFS file system. \n\n\nIn secure mode STRAM has to authenticate with both Resource Manager and Name Node before it can send any requests and this is achieved using Delegation Tokens. Since STRAM runs as a managed application master it runs in a Hadoop container. This container could have been allocated on any node based on what resources were available. Since there is no fixed node where STRAM runs it does not have Kerberos credentials and hence unlike the launch client dtcli it cannot authenticate with Hadoop services Resource Manager and Name Node using Kerberos. Instead, Delega
 tion Tokens are used for authentication.\n\n\nDelegation Tokens\n\n\nDelegation tokens are tokens that are dynamically issued by the source and clients use them to authenticate with the source. The source stores the delegation tokens it has issued in a cache and checks the delegation token sent by a client against the cache. If a match is found, the authentication is successful else it fails. This is the second mode of authentication in secure Hadoop after Kerberos. More details can be found in the Hadoop security design document. In this case the delegation tokens are issued by Resource Manager and Name Node. STRAM useswould use these tokens to authenticate with them. But how does it get them in the first place? This is where the launch client dtcli comes in. \n\n\nThe client dtcli, since it possesses Kerberos credentials as explained in the Application Launch section, is able to authenticate with Resource Manager and Name Node using Kerberos. It then requests for delegation tokens
  over the Kerberos authenticated connection. The servers return the delegation tokens in the response payload. The client in requesting the resource manager for the start of the application master container for STRAM seeds it with these tokens so that when STRAM starts it has these tokens. It can then use these tokens to authenticate with the Hadoop services.\n\n\nStreaming Container\n\n\nA streaming container is a process that runs a part of the application business logic. It is a container deployed on a node in the cluster. The part of business logic is implemented in what we call an operator. Multiple operators connected together make up the complete application and hence there are multiple streaming containers in an application. The streaming containers have different types of communications going on as illustrated in the diagram above. They are described below.\n\n\nSTRAM Delegation Token\n\n\nThe streaming containers periodically communicate with the application master STRAM. 
 In the communication they send what are called heartbeats with information such as statistics and receive commands from STRAM such as deployment or un-deployment of operators, changing properties of operators etc. In secure mode, this communication cannot just occur without any authentication. To facilitate this authentication special tokens called STRAM Delegation Tokens are used. These tokens are created and managed by STRAM. When a new streaming container is being started, since STRAM is the one negotiating resources from Resource Manager for the container and requesting to start the container, it seeds the container with the STRAM delegation token necessary to communicate with it. Thus, a streaming container has the STRAM delegation token to successfully authenticate and communicate with STRAM.\n\n\nBuffer Server Token\n\n\nAs mentioned earlier an operator implements a piece of the business logic of the application and multiple operators together complete the application. In cre
 ating the application the operators are assembled together in a direct acyclic graph, a pipeline, with output of operators becoming the input for other operators. At runtime the stream containers hosting the operators are connected to each other and sending data to each other. In secure mode these connections should be authenticated too, more importantly than others, as they are involved in transferring application data.\n\n\nWhen operators are running there will be effective processing rate differences between them due to intrinsic reasons such as operator logic or external reasons such as different resource availability of CPU, memory, network bandwidth etc. as the operators are running in different containers. To maximize performance and utilization the data flow is handled asynchronous to the regular operator function and a buffer is used to intermediately store the data that is being produced by the operator. This buffered data is served by a buffer server over the network conn
 ection to the downstream streaming container containing the operator that is supposed to receive the data from this operator. This connection is secured by a token called the buffer server token. These tokens are also generated and seeded by STRAM when the streaming containers are deployed and started and it uses different tokens for different buffer servers to have better security.\n\n\nNameNode Delegation Token\n\n\nLike STRAM, streaming containers also need to communicate with NameNode to use HDFS persistence for reasons such as saving the state of the operators. In secure mode they also use NameNode delegation tokens for authentication. These tokens are also seeded by STRAM for the streaming containers.\n\n\nConclusion\n\n\nWe looked at the different security requirements for distributed applications when they run in a secure Hadoop environment and looked at how Apex solves this.", 
+            "title": "Security"
+        }, 
+        {
+            "location": "/security/#security", 
+            "text": "Applications built on Apex run as native YARN applications on Hadoop. The security framework and apparatus in Hadoop apply to the applications. The default security mechanism in Hadoop is Kerberos.", 
+            "title": "Security"
+        }, 
+        {
+            "location": "/security/#kerberos-authentication", 
+            "text": "Kerberos is a ticket based authentication system that provides authentication in a distributed environment where authentication is needed between multiple users, hosts and services. It is the de-facto authentication mechanism supported in Hadoop. To use Kerberos authentication, the Hadoop installation must first be configured for secure mode with Kerberos. Please refer to the administration guide of your Hadoop distribution on how to do that. Once Hadoop is configured, there is some configuration needed on Apex side as well.", 
+            "title": "Kerberos Authentication"
+        }, 
+        {
+            "location": "/security/#configuring-security", 
+            "text": "There is Hadoop configuration and CLI configuration. Hadoop configuration may be optional.", 
+            "title": "Configuring security"
+        }, 
+        {
+            "location": "/security/#hadoop-configuration", 
+            "text": "An Apex application uses delegation tokens to authenticte with the ResourceManager (YARN) and NameNode (HDFS) and these tokens are issued by those servers respectively. Since the application is long-running,\nthe tokens should be valid for the lifetime of the application. Hadoop has a configuration setting for the maximum lifetime of the tokens and they should be set to cover the lifetime of the application. There are separate settings for ResourceManager and NameNode delegation\ntokens.  The ResourceManager delegation token max lifetime is specified in  yarn-site.xml  and can be specified as follows for example for a lifetime of 1 year  property \n   name yarn.resourcemanager.delegation.token.max-lifetime /name \n   value 31536000000 /value  /property   The NameNode delegation token max lifetime is specified in\nhdfs-site.xml and can be specified as follows for example for a lifetime of 1 year  property \n    name dfs.namenode.delegation.token.max-lifetime /nam
 e \n    value 31536000000 /value \n  /property", 
+            "title": "Hadoop Configuration"
+        }, 
+        {
+            "location": "/security/#cli-configuration", 
+            "text": "The Apex command line interface is used to launch\napplications along with performing various other operations and administrative tasks on the applications. \u00a0When Kerberos security is enabled in Hadoop, a Kerberos ticket granting ticket (TGT) or the Kerberos credentials of the user are needed by the CLI program  dtcli  to authenticate with Hadoop for any operation. Kerberos credentials are composed of a principal and either a  keytab  or a password. For security and operational reasons only keytabs are supported in Hadoop and by extension in Apex platform. When user credentials are specified, all operations including launching\napplication are performed as that user.", 
+            "title": "CLI Configuration"
+        }, 
+        {
+            "location": "/security/#using-kinit", 
+            "text": "A Keberos ticket granting ticket (TGT) can be obtained by using the Kerberos command  kinit . Detailed documentation for the command can be found online or in man pages. An sample usage of this command is  kinit -k -t path-tokeytab-file kerberos-principal  If this command is successful, the TGT is obtained, cached and available for other programs. The CLI program  dtcli  can then be started to launch applications and perform other operations.", 
+            "title": "Using kinit"
+        }, 
+        {
+            "location": "/security/#using-kerberos-credentials", 
+            "text": "The CLI program  dtcli  can also use the Kerberos credentials directly without requiring a TGT to be obtained separately. This can be useful in batch mode where  dtcli  is not launched manually and also in scenarios where running another program like  kinit  is not feasible.  The credentials can be specified in the  dt-site.xml  configuration file. If only a single user is launching applications, the global  dt-site.xml  configuration file in the installation folder can be used. In a multi-user environment the users can use the  dt-site.xml  file in their\nhome directory. The location of this file will be  $HOME/.dt/dt-site.xml . If this file does not exist, the user can create a new one.  The snippet below shows the how the credentials can be specified in the configuration file as properties.  property \n         name dt.authentication.principal /name \n         value kerberos-principal-of-user /value  /property  property \n         name dt.authentication.keyta
 b /name \n         value absolute-path-to-keytab-file /value  /property   The property  dt.authentication.principal  specifies the Kerberos user principal and  dt.authentication.keytab  specifies the absolute path to the keytab file for the user.  The subsequent sections talk about how security works in Apex. This information is not needed by users but is intended for the inquisitive techical audience who want to know how security works.", 
+            "title": "Using Kerberos credentials"
+        }, 
+        {
+            "location": "/security/#security-architecture", 
+            "text": "In this section we will see how security works for applications built on Apex. We will look at the different methodologies involved in running the applications and in each case we will look into the different components that are involved. We will go into the architecture of these components and look at the different security mechanisms that are in play.", 
+            "title": "Security architecture"
+        }, 
+        {
+            "location": "/security/#application-launch", 
+            "text": "To launch applications in Apache Apex the command line client dtcli can be used. The application artifacts such as binaries and properties are supplied as an application package. The client, during the various steps involved to launch the application needs to communicate with both the Resource Manager and the Name Node. The Resource Manager communication involves the client asking for new resources to run the application master and start the application launch process. The steps along with sample Java code are described in Writing YARN Applications. The Name Node communication includes the application artifacts being copied to HDFS so that they are available across the cluster for launching the different application containers.  In secure mode the communications with both Resource Manager and Name Node requires authentication and the mechanism is Kerberos. Below is an illustration showing this.            The client dtcli supports Kerberos authentication and wil
 l automatically enable it in a secure environment. To authenticate, some Kerberos configuration namely the Kerberos credentials, are needed by the client. There are two parameters, the Kerberos principal and keytab to use for the client. These can be specified in the dt-site.xml configuration file. The properties are shown below       property \n             name dt.authentication.principal /name \n             value kerberos-principal-of-user /value \n     /property \n     property \n             name dt.authentication.keytab /name \n             value absolute-path-to-keytab-file /value \n     /property   Refer to document Operation and Installation Guide section Multi Tenancy and Security subsection CLI Configuration in the documentation for more information. The document can also be accessed here client configuration  There is another important functionality that is performed by the client and that is to retrieve what are called delegation tokens from the Resource Manager and Na
 me Node to seed the application master container that is to be launched. This is detailed in the next section.", 
+            "title": "Application Launch"
+        }, 
+        {
+            "location": "/security/#runtime-security", 
+            "text": "When the application is completely up and running, there are different components of the application running as separate processes possibly on different nodes in the cluster as it is a distributed application. These components interactwould be interacting with each other and the Hadoop services. In secure mode, all these interactions have to be authenticated before they can be successfully processed. The interactions are illustrated below in a diagram to give a complete overview. Each of them is explained in subsequent sections.", 
+            "title": "Runtime Security"
+        }, 
+        {
+            "location": "/security/#stram-and-hadoop", 
+            "text": "Every Apache Apex application has a master process akin to any YARN application. In our case it is called STRAM (Streaming Application Master). It is a master process that runs in its own container and manages the different distributed components of the application. Among other tasks it requests Resource Manager for new resources as they are needed and gives back resources that are no longer needed. STRAM also needs to communicate with Name Node from time-to-time to access the persistent HDFS file system.   In secure mode STRAM has to authenticate with both Resource Manager and Name Node before it can send any requests and this is achieved using Delegation Tokens. Since STRAM runs as a managed application master it runs in a Hadoop container. This container could have been allocated on any node based on what resources were available. Since there is no fixed node where STRAM runs it does not have Kerberos credentials and hence unlike the launch client dtcli it ca
 nnot authenticate with Hadoop services Resource Manager and Name Node using Kerberos. Instead, Delegation Tokens are used for authentication.", 
+            "title": "STRAM and Hadoop"
+        }, 
+        {
+            "location": "/security/#delegation-tokens", 
+            "text": "Delegation tokens are tokens that are dynamically issued by the source and clients use them to authenticate with the source. The source stores the delegation tokens it has issued in a cache and checks the delegation token sent by a client against the cache. If a match is found, the authentication is successful else it fails. This is the second mode of authentication in secure Hadoop after Kerberos. More details can be found in the Hadoop security design document. In this case the delegation tokens are issued by Resource Manager and Name Node. STRAM useswould use these tokens to authenticate with them. But how does it get them in the first place? This is where the launch client dtcli comes in.   The client dtcli, since it possesses Kerberos credentials as explained in the Application Launch section, is able to authenticate with Resource Manager and Name Node using Kerberos. It then requests for delegation tokens over the Kerberos authenticated connection. The ser
 vers return the delegation tokens in the response payload. The client in requesting the resource manager for the start of the application master container for STRAM seeds it with these tokens so that when STRAM starts it has these tokens. It can then use these tokens to authenticate with the Hadoop services.", 
+            "title": "Delegation Tokens"
+        }, 
+        {
+            "location": "/security/#streaming-container", 
+            "text": "A streaming container is a process that runs a part of the application business logic. It is a container deployed on a node in the cluster. The part of business logic is implemented in what we call an operator. Multiple operators connected together make up the complete application and hence there are multiple streaming containers in an application. The streaming containers have different types of communications going on as illustrated in the diagram above. They are described below.", 
+            "title": "Streaming Container"
+        }, 
+        {
+            "location": "/security/#stram-delegation-token", 
+            "text": "The streaming containers periodically communicate with the application master STRAM. In the communication they send what are called heartbeats with information such as statistics and receive commands from STRAM such as deployment or un-deployment of operators, changing properties of operators etc. In secure mode, this communication cannot just occur without any authentication. To facilitate this authentication special tokens called STRAM Delegation Tokens are used. These tokens are created and managed by STRAM. When a new streaming container is being started, since STRAM is the one negotiating resources from Resource Manager for the container and requesting to start the container, it seeds the container with the STRAM delegation token necessary to communicate with it. Thus, a streaming container has the STRAM delegation token to successfully authenticate and communicate with STRAM.", 
+            "title": "STRAM Delegation Token"
+        }, 
+        {
+            "location": "/security/#buffer-server-token", 
+            "text": "As mentioned earlier an operator implements a piece of the business logic of the application and multiple operators together complete the application. In creating the application the operators are assembled together in a direct acyclic graph, a pipeline, with output of operators becoming the input for other operators. At runtime the stream containers hosting the operators are connected to each other and sending data to each other. In secure mode these connections should be authenticated too, more importantly than others, as they are involved in transferring application data.  When operators are running there will be effective processing rate differences between them due to intrinsic reasons such as operator logic or external reasons such as different resource availability of CPU, memory, network bandwidth etc. as the operators are running in different containers. To maximize performance and utilization the data flow is handled asynchronous to the regular operato
 r function and a buffer is used to intermediately store the data that is being produced by the operator. This buffered data is served by a buffer server over the network connection to the downstream streaming container containing the operator that is supposed to receive the data from this operator. This connection is secured by a token called the buffer server token. These tokens are also generated and seeded by STRAM when the streaming containers are deployed and started and it uses different tokens for different buffer servers to have better security.", 
+            "title": "Buffer Server Token"
+        }, 
+        {
+            "location": "/security/#namenode-delegation-token", 
+            "text": "Like STRAM, streaming containers also need to communicate with NameNode to use HDFS persistence for reasons such as saving the state of the operators. In secure mode they also use NameNode delegation tokens for authentication. These tokens are also seeded by STRAM for the streaming containers.", 
+            "title": "NameNode Delegation Token"
+        }, 
+        {
+            "location": "/security/#conclusion", 
+            "text": "We looked at the different security requirements for distributed applications when they run in a secure Hadoop environment and looked at how Apex solves this.", 
+            "title": "Conclusion"
+        }, 
+        {
             "location": "/compatibility/", 
             "text": "Apache Apex Compatibility\n\n\nPurpose\n\n\nThis document captures the compatibility goals of the Apache Apex project. The different types of compatibility between Apex releases that affect contributors, downstream projects, and end-users are enumerated. For each type of compatibility we:\n\n\n\n\ndescribe the impact on downstream projects or end-users\n\n\nwhere applicable, call out the policy adopted when incompatible changes are permitted.\n\n\n\n\nApache Apex follows \nsemantic versioning\n. Depending on the compatibility type, there may be different tools or mechanisms to ensure compatibility, for example by comparing artifacts during the build process.\n\n\nThe type of change will inform the required target version number. Given a version number MAJOR.MINOR.PATCH, increment the:\n\n\n\n\nMAJOR version when you make incompatible API changes,\n\n\nMINOR version when you add functionality in a backward-compatible manner, and\n\n\nPATCH version when you make b
 ackward-compatible bug fixes.\n\n\n\n\nAdditional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.\n\n\nThe overall goal is to avoid backward incompatible changes and major release upgrades. Accordingly we attempt to release new features with minor versions that are incremental to the prior release and offer our users a frictionless upgrade path. When planning contributions, please consider compatibility and release road map upfront. Specifically, certain changes that conflict with the versioning may need to be documented in JIRA and deferred until a future major release. \n\n\nCompatibility types\n\n\nJava API\n\n\nPublic API compatibility is required to ensure end-user programs and downstream projects continue to work without modification.\nThe public API consists of:\n\n\n\n\napex-core: all interfaces and classes in \napi\n and \ncommon\n modules\n\n\napex-malhar: all interfaces and classes in all modules except \ndemos\n, \ns
 amples\n, \nbenchmark\n \n\n\n\n\nInterfaces and classes that are part of the public API and are annotated with \ninterface stability\n are treated according to the rules defined by the annotation.  \n\n\nPolicy\n\n\nChanges to the public API must follow semantic versioning. \nPublic APIs must be deprecated for at least one minor release prior to their removal in a major release.\nThe \njapicmp Maven plugin\n is used to enforce compatibility as part of the Travis pre-commit builds.\n\n\nSemantic compatibility\n\n\nThe behavior of APIs needs to remain consistent over versions, though changes for correctness may result in changes in behavior. Tests and javadocs specify the behavior. Over time, test suites should be expanded to verify compliance with the specification, effectively creating a formal specification for the subset of behaviors that can be easily tested.\n\n\nPolicy\n\n\nThe behavior of existing API cannot be modified as it would break existing user code. There are exceptio
 nal circumstances that may justify such changes, in which cases they should be discussed on the mailing list before implementation. Examples are bug fixes related to security issues, data corruption/consistency or to correct an unintended change from previous release that violated semantic compatibility. Such changes should be accompanied by test coverage for the exact behavior.\n\n\nREST API\n\n\nREST API compatibility corresponds to both the URLs and request/response content over the wire. REST APIs are specifically meant for stable use by clients across releases, even major releases. \n\n\nPolicy\n\n\nThe REST API is separately versioned. This is to allow for co-existence of old and new API should there be a need for backward incompatible changes in the future.\n\n\nCommand Line Interface (CLI)\n\n\nThe CLI may be used either directly via the system shell or via shell scripts. Changing the path, removing or renaming command line options, the order of arguments, or the command ret
 urn code and output break compatibility and may adversely affect users.\n\n\nPolicy\n\n\nCLI commands are to be deprecated (warning when used) in a prior minor release before they are removed or incompatibly modified in a subsequent major release.\n\n\nConfiguration Files\n\n\nConfiguration files are used for engine or application settings. Changes to keys and default values directly affect users and are hard to diagnose (compared to a compile error, for example).\n\n\nPolicy\n\n\nName, location, format, keys of configuration files should be deprecated in a prior minor release and can only be changed in major release. Best effort should be made to support the deprecated behavior for one more major release (not guaranteed). It is also desirable to provide the user with a migration tool.\n\n\nInternal Wire compatibility\n\n\nApex containers internally use RPC communication and netlet for the data flow. The protocols are private and user components are not exposed to it. Apex is a YARN
  application and automatically deployed. There is currently no situation where containers of different Apex engine versions need to be interoperable. Should such a scenario become relevant in the future, wire compatibility needs to be specified.\n\n\nPolicy\n\n\nN/A\n\n\nInternal File formats\n\n\nApex engine stores data in the file system for recovery and the data is typically obtained from serialization (from Kryo, Java etc.). Changes to internal classes may affect the ability to relaunch an application with upgraded engine code from previous state. This is currently not supported. In the future, the serialization mechanism should guarantee backward compatibility.\n\n\nPolicy\n\n\nCurrently no compatibility guarantee. User to cold-restart application on engine upgrade.\n\n\nJava Classpath\n\n\nApex applications should not bundle Hadoop dependencies or Apex engine dependencies but use the dependencies provided in the target environment to avoid conflicts. The Apex application arche
 type can be used to generate a compliant project.  \n\n\nPolicy\n\n\nApex engine dependencies can change as per semantic versioning. Following above guidelines automatically maintains the backward compatibility based on semantic versioning of Apex.\n\n\nMaven Build Artifacts\n\n\nDownstream projects reference the Apex engine dependencies and Malhar operator libraries for application development etc. Changes to the packaging (which classes are in which jar), the groupId, artifactId and which artifacts are deployed to Maven central impact upgrades.\n\n\nPolicy\n\n\nThe artifacts that contain the classes that form the public API as specified above cannot change in patch releases and should stay compatible within a major release. Patch releases can change dependencies, but only at the patch level and following semantic versioning.\n\n\nHardware/Software Requirements\n\n\nApex depends on Apache Hadoop. The community intends to support all major Hadoop distros and current versions. Apex c
 urrently supports Hadoop 2.2.0 and higher and Java 7 and higher. Apex is written in Java and has been tested on Linux based Hadoop clusters. There are no additional restrictions on the hardware architecture.  \n\n\nTo keep up with the latest advances in hardware, operating systems, JVMs, Hadoop and other software, new Apex releases may require higher versions. Upgrading Apex may require upgrading other dependent software components.\n\n\nPolicy\n\n\nThe JVM and Hadoop minimum version requirements are not expected to change outside major releases.", 
             "title": "Compatibility"

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/operator_development/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/operator_development/index.html b/docs/apex-3.3/operator_development/index.html
index cae1749..710d490 100644
--- a/docs/apex-3.3/operator_development/index.html
+++ b/docs/apex-3.3/operator_development/index.html
@@ -176,6 +176,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/search.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/search.html b/docs/apex-3.3/search.html
index 6fa6c09..fda658a 100644
--- a/docs/apex-3.3/search.html
+++ b/docs/apex-3.3/search.html
@@ -113,6 +113,13 @@
     </li>
 
         
+            
+    <li class="toctree-l1 ">
+        <a class="" href="security/">Security</a>
+        
+    </li>
+
+        
     </ul>
 <li>
           

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/security/index.html
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/security/index.html b/docs/apex-3.3/security/index.html
new file mode 100644
index 0000000..15b2b94
--- /dev/null
+++ b/docs/apex-3.3/security/index.html
@@ -0,0 +1,322 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Security - Apache Apex Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../favicon.ico">
+  
+
+  
+  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Security";
+    var mkdocs_page_input_path = "security.md";
+    var mkdocs_page_url = "/security/";
+  </script>
+  
+  <script src="../js/jquery-2.1.1.min.js"></script>
+  <script src="../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../js/highlight.pack.js"></script>
+  <script src="../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href=".." class="icon icon-home"> Apache Apex Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="..">Apache Apex</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Development</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../apex_development_setup/">Development Setup</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../application_development/">Applications</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../application_packages/">Packages</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../operator_development/">Operators</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../autometrics/">AutoMetric API</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operations</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../dtcli/">dtCli</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Security</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#security">Security</a></li>
+                
+                    <li><a class="toctree-l4" href="#kerberos-authentication">Kerberos Authentication</a></li>
+                
+                    <li><a class="toctree-l4" href="#configuring-security">Configuring security</a></li>
+                
+                    <li><a class="toctree-l4" href="#security-architecture">Security architecture</a></li>
+                
+                    <li><a class="toctree-l4" href="#conclusion">Conclusion</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../compatibility/">Compatibility</a>
+        
+    </li>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="..">Apache Apex Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operations &raquo;</li>
+        
+      
+    
+    <li>Security</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="security">Security</h1>
+<p>Applications built on Apex run as native YARN applications on Hadoop. The security framework and apparatus in Hadoop apply to the applications. The default security mechanism in Hadoop is Kerberos.</p>
+<h2 id="kerberos-authentication">Kerberos Authentication</h2>
+<p>Kerberos is a ticket based authentication system that provides authentication in a distributed environment where authentication is needed between multiple users, hosts and services. It is the de-facto authentication mechanism supported in Hadoop. To use Kerberos authentication, the Hadoop installation must first be configured for secure mode with Kerberos. Please refer to the administration guide of your Hadoop distribution on how to do that. Once Hadoop is configured, there is some configuration needed on Apex side as well.</p>
+<h2 id="configuring-security">Configuring security</h2>
+<p>There is Hadoop configuration and CLI configuration. Hadoop configuration may be optional.</p>
+<h3 id="hadoop-configuration">Hadoop Configuration</h3>
+<p>An Apex application uses delegation tokens to authenticte with the ResourceManager (YARN) and NameNode (HDFS) and these tokens are issued by those servers respectively. Since the application is long-running,
+the tokens should be valid for the lifetime of the application. Hadoop has a configuration setting for the maximum lifetime of the tokens and they should be set to cover the lifetime of the application. There are separate settings for ResourceManager and NameNode delegation
+tokens.</p>
+<p>The ResourceManager delegation token max lifetime is specified in <code>yarn-site.xml</code> and can be specified as follows for example for a lifetime of 1 year</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;yarn.resourcemanager.delegation.token.max-lifetime&lt;/name&gt;
+  &lt;value&gt;31536000000&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>The NameNode delegation token max lifetime is specified in
+hdfs-site.xml and can be specified as follows for example for a lifetime of 1 year</p>
+<pre><code class="xml">&lt;property&gt;
+   &lt;name&gt;dfs.namenode.delegation.token.max-lifetime&lt;/name&gt;
+   &lt;value&gt;31536000000&lt;/value&gt;
+ &lt;/property&gt;
+</code></pre>
+
+<h3 id="cli-configuration">CLI Configuration</h3>
+<p>The Apex command line interface is used to launch
+applications along with performing various other operations and administrative tasks on the applications.  When Kerberos security is enabled in Hadoop, a Kerberos ticket granting ticket (TGT) or the Kerberos credentials of the user are needed by the CLI program <code>dtcli</code> to authenticate with Hadoop for any operation. Kerberos credentials are composed of a principal and either a <em>keytab</em> or a password. For security and operational reasons only keytabs are supported in Hadoop and by extension in Apex platform. When user credentials are specified, all operations including launching
+application are performed as that user.</p>
+<h4 id="using-kinit">Using kinit</h4>
+<p>A Keberos ticket granting ticket (TGT) can be obtained by using the Kerberos command <code>kinit</code>. Detailed documentation for the command can be found online or in man pages. An sample usage of this command is</p>
+<pre><code>kinit -k -t path-tokeytab-file kerberos-principal
+</code></pre>
+<p>If this command is successful, the TGT is obtained, cached and available for other programs. The CLI program <code>dtcli</code> can then be started to launch applications and perform other operations.</p>
+<h4 id="using-kerberos-credentials">Using Kerberos credentials</h4>
+<p>The CLI program <code>dtcli</code> can also use the Kerberos credentials directly without requiring a TGT to be obtained separately. This can be useful in batch mode where <code>dtcli</code> is not launched manually and also in scenarios where running another program like <code>kinit</code> is not feasible.</p>
+<p>The credentials can be specified in the <code>dt-site.xml</code> configuration file. If only a single user is launching applications, the global <code>dt-site.xml</code> configuration file in the installation folder can be used. In a multi-user environment the users can use the <code>dt-site.xml</code> file in their
+home directory. The location of this file will be <code>$HOME/.dt/dt-site.xml</code>. If this file does not exist, the user can create a new one.</p>
+<p>The snippet below shows the how the credentials can be specified in the configuration file as properties.</p>
+<pre><code class="xml">&lt;property&gt;
+        &lt;name&gt;dt.authentication.principal&lt;/name&gt;
+        &lt;value&gt;kerberos-principal-of-user&lt;/value&gt;
+&lt;/property&gt;
+&lt;property&gt;
+        &lt;name&gt;dt.authentication.keytab&lt;/name&gt;
+        &lt;value&gt;absolute-path-to-keytab-file&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>The property <code>dt.authentication.principal</code> specifies the Kerberos user principal and <code>dt.authentication.keytab</code> specifies the absolute path to the keytab file for the user.</p>
+<p>The subsequent sections talk about how security works in Apex. This information is not needed by users but is intended for the inquisitive techical audience who want to know how security works.</p>
+<h2 id="security-architecture">Security architecture</h2>
+<p>In this section we will see how security works for applications built on Apex. We will look at the different methodologies involved in running the applications and in each case we will look into the different components that are involved. We will go into the architecture of these components and look at the different security mechanisms that are in play.</p>
+<h3 id="application-launch">Application Launch</h3>
+<p>To launch applications in Apache Apex the command line client dtcli can be used. The application artifacts such as binaries and properties are supplied as an application package. The client, during the various steps involved to launch the application needs to communicate with both the Resource Manager and the Name Node. The Resource Manager communication involves the client asking for new resources to run the application master and start the application launch process. The steps along with sample Java code are described in Writing YARN Applications. The Name Node communication includes the application artifacts being copied to HDFS so that they are available across the cluster for launching the different application containers.</p>
+<p>In secure mode the communications with both Resource Manager and Name Node requires authentication and the mechanism is Kerberos. Below is an illustration showing this.</p>
+<p><img alt="" src="../images/security/image02.png" />        </p>
+<p>The client dtcli supports Kerberos authentication and will automatically enable it in a secure environment. To authenticate, some Kerberos configuration namely the Kerberos credentials, are needed by the client. There are two parameters, the Kerberos principal and keytab to use for the client. These can be specified in the dt-site.xml configuration file. The properties are shown below</p>
+<pre><code>    &lt;property&gt;
+            &lt;name&gt;dt.authentication.principal&lt;/name&gt;
+            &lt;value&gt;kerberos-principal-of-user&lt;/value&gt;
+    &lt;/property&gt;
+    &lt;property&gt;
+            &lt;name&gt;dt.authentication.keytab&lt;/name&gt;
+            &lt;value&gt;absolute-path-to-keytab-file&lt;/value&gt;
+    &lt;/property&gt;
+</code></pre>
+<p>Refer to document Operation and Installation Guide section Multi Tenancy and Security subsection CLI Configuration in the documentation for more information. The document can also be accessed here client configuration</p>
+<p>There is another important functionality that is performed by the client and that is to retrieve what are called delegation tokens from the Resource Manager and Name Node to seed the application master container that is to be launched. This is detailed in the next section. </p>
+<h3 id="runtime-security">Runtime Security</h3>
+<p>When the application is completely up and running, there are different components of the application running as separate processes possibly on different nodes in the cluster as it is a distributed application. These components interactwould be interacting with each other and the Hadoop services. In secure mode, all these interactions have to be authenticated before they can be successfully processed. The interactions are illustrated below in a diagram to give a complete overview. Each of them is explained in subsequent sections.</p>
+<p><img alt="" src="../images/security/image00.png" /></p>
+<h4 id="stram-and-hadoop">STRAM and Hadoop</h4>
+<p>Every Apache Apex application has a master process akin to any YARN application. In our case it is called STRAM (Streaming Application Master). It is a master process that runs in its own container and manages the different distributed components of the application. Among other tasks it requests Resource Manager for new resources as they are needed and gives back resources that are no longer needed. STRAM also needs to communicate with Name Node from time-to-time to access the persistent HDFS file system. </p>
+<p>In secure mode STRAM has to authenticate with both Resource Manager and Name Node before it can send any requests and this is achieved using Delegation Tokens. Since STRAM runs as a managed application master it runs in a Hadoop container. This container could have been allocated on any node based on what resources were available. Since there is no fixed node where STRAM runs it does not have Kerberos credentials and hence unlike the launch client dtcli it cannot authenticate with Hadoop services Resource Manager and Name Node using Kerberos. Instead, Delegation Tokens are used for authentication.</p>
+<h5 id="delegation-tokens">Delegation Tokens</h5>
+<p>Delegation tokens are tokens that are dynamically issued by the source and clients use them to authenticate with the source. The source stores the delegation tokens it has issued in a cache and checks the delegation token sent by a client against the cache. If a match is found, the authentication is successful else it fails. This is the second mode of authentication in secure Hadoop after Kerberos. More details can be found in the Hadoop security design document. In this case the delegation tokens are issued by Resource Manager and Name Node. STRAM useswould use these tokens to authenticate with them. But how does it get them in the first place? This is where the launch client dtcli comes in. </p>
+<p>The client dtcli, since it possesses Kerberos credentials as explained in the Application Launch section, is able to authenticate with Resource Manager and Name Node using Kerberos. It then requests for delegation tokens over the Kerberos authenticated connection. The servers return the delegation tokens in the response payload. The client in requesting the resource manager for the start of the application master container for STRAM seeds it with these tokens so that when STRAM starts it has these tokens. It can then use these tokens to authenticate with the Hadoop services.</p>
+<h4 id="streaming-container">Streaming Container</h4>
+<p>A streaming container is a process that runs a part of the application business logic. It is a container deployed on a node in the cluster. The part of business logic is implemented in what we call an operator. Multiple operators connected together make up the complete application and hence there are multiple streaming containers in an application. The streaming containers have different types of communications going on as illustrated in the diagram above. They are described below.</p>
+<h5 id="stram-delegation-token">STRAM Delegation Token</h5>
+<p>The streaming containers periodically communicate with the application master STRAM. In the communication they send what are called heartbeats with information such as statistics and receive commands from STRAM such as deployment or un-deployment of operators, changing properties of operators etc. In secure mode, this communication cannot just occur without any authentication. To facilitate this authentication special tokens called STRAM Delegation Tokens are used. These tokens are created and managed by STRAM. When a new streaming container is being started, since STRAM is the one negotiating resources from Resource Manager for the container and requesting to start the container, it seeds the container with the STRAM delegation token necessary to communicate with it. Thus, a streaming container has the STRAM delegation token to successfully authenticate and communicate with STRAM.</p>
+<h5 id="buffer-server-token">Buffer Server Token</h5>
+<p>As mentioned earlier an operator implements a piece of the business logic of the application and multiple operators together complete the application. In creating the application the operators are assembled together in a direct acyclic graph, a pipeline, with output of operators becoming the input for other operators. At runtime the stream containers hosting the operators are connected to each other and sending data to each other. In secure mode these connections should be authenticated too, more importantly than others, as they are involved in transferring application data.</p>
+<p>When operators are running there will be effective processing rate differences between them due to intrinsic reasons such as operator logic or external reasons such as different resource availability of CPU, memory, network bandwidth etc. as the operators are running in different containers. To maximize performance and utilization the data flow is handled asynchronous to the regular operator function and a buffer is used to intermediately store the data that is being produced by the operator. This buffered data is served by a buffer server over the network connection to the downstream streaming container containing the operator that is supposed to receive the data from this operator. This connection is secured by a token called the buffer server token. These tokens are also generated and seeded by STRAM when the streaming containers are deployed and started and it uses different tokens for different buffer servers to have better security.</p>
+<h5 id="namenode-delegation-token">NameNode Delegation Token</h5>
+<p>Like STRAM, streaming containers also need to communicate with NameNode to use HDFS persistence for reasons such as saving the state of the operators. In secure mode they also use NameNode delegation tokens for authentication. These tokens are also seeded by STRAM for the streaming containers.</p>
+<h2 id="conclusion">Conclusion</h2>
+<p>We looked at the different security requirements for distributed applications when they run in a secure Hadoop environment and looked at how Apex solves this.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
+      
+        <a href="../compatibility/" class="btn btn-neutral float-right" title="Compatibility">Next <span class="icon icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../dtcli/" class="btn btn-neutral" title="dtCli"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+</footer>
+	  
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../dtcli/" style="color: #fcfcfc;">&laquo; Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../compatibility/" style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-apex-site/blob/c7ee6de7/docs/apex-3.3/sitemap.xml
----------------------------------------------------------------------
diff --git a/docs/apex-3.3/sitemap.xml b/docs/apex-3.3/sitemap.xml
index 262a94b..961aad4 100644
--- a/docs/apex-3.3/sitemap.xml
+++ b/docs/apex-3.3/sitemap.xml
@@ -4,7 +4,7 @@
     
     <url>
      <loc>/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
     
@@ -13,31 +13,31 @@
         
     <url>
      <loc>/apex_development_setup/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/application_development/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/application_packages/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/operator_development/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
     <url>
      <loc>/autometrics/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
@@ -47,7 +47,13 @@
         
     <url>
      <loc>/dtcli/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
+     <changefreq>daily</changefreq>
+    </url>
+        
+    <url>
+     <loc>/security/</loc>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
         
@@ -56,7 +62,7 @@
     
     <url>
      <loc>/compatibility/</loc>
-     <lastmod>2016-03-18</lastmod>
+     <lastmod>2016-03-21</lastmod>
      <changefreq>daily</changefreq>
     </url>
     


Mime
View raw message