apex-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t..@apache.org
Subject [03/15] apex-site git commit: from f5751880732ca3c0813ff11032cec27b6126a165
Date Sun, 27 Nov 2016 01:25:05 GMT
http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/fsInputOperator/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/fsInputOperator/index.html b/content/docs/malhar-3.6/operators/fsInputOperator/index.html
new file mode 100644
index 0000000..026ced2
--- /dev/null
+++ b/content/docs/malhar-3.6/operators/fsInputOperator/index.html
@@ -0,0 +1,754 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>File Input - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "File Input";
+    var mkdocs_page_input_path = "operators/fsInputOperator.md";
+    var mkdocs_page_url = "/operators/fsInputOperator/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">File Input</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#file-input-operator">File Input Operator</a></li>
+                
+                    <li><a class="toctree-l4" href="#operator-objective">Operator Objective</a></li>
+                
+                    <li><a class="toctree-l4" href="#overview">Overview</a></li>
+                
+                    <li><a class="toctree-l4" href="#use-cases">Use Cases</a></li>
+                
+                    <li><a class="toctree-l4" href="#how-to-use">How to Use?</a></li>
+                
+                    <li><a class="toctree-l4" href="#partitioning">Partitioning</a></li>
+                
+                    <li><a class="toctree-l4" href="#operator-information">Operator Information</a></li>
+                
+                    <li><a class="toctree-l4" href="#ports">Ports</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#abstract-methods">Abstract Methods</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#derived-classes">Derived Classes</a></li>
+                
+                    <li><a class="toctree-l4" href="#1-abstractftpinputoperator">1. AbstractFTPInputOperator</a></li>
+                
+                    <li><a class="toctree-l4" href="#2-ftpstringinputoperator">2. FTPStringInputOperator</a></li>
+                
+                    <li><a class="toctree-l4" href="#3-abstractparquetfilereader">3. AbstractParquetFileReader</a></li>
+                
+                    <li><a class="toctree-l4" href="#4-abstractthroughputfileinputoperator">4. AbstractThroughputFileInputOperator</a></li>
+                
+                    <li><a class="toctree-l4" href="#5-linebylinefileinputoperator">5. LineByLineFileInputOperator</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#example-implementation-using-a-custom-character-encoding">Example Implementation Using a Custom Character Encoding</a></li>
+                
+            
+                <li class="toctree-l3"><a href="#common-implementation-scenarios">Common Implementation Scenarios</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">Csv Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>File Input</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="file-input-operator">File Input Operator</h1>
+<h2 id="operator-objective">Operator Objective</h2>
+<p>This operator is designed to scan a directory for files, read and split file content into tuples
+such as lines or a block of bytes, and finally emit them on output ports defined in concrete
+subclasses for further processing by downstream operators.
+It can be used with any filesystem supported by Hadoop like HDFS, S3, ftp, NFS etc.</p>
+<h2 id="overview">Overview</h2>
+<p>The operator is <strong>idempotent</strong>, <strong>fault-tolerant</strong> and <strong>partitionable</strong>.</p>
+<p>Logic for directory scanning is encapsulated in the <code>DirectoryScanner</code> static inner class
+which provides functions such as matching file names against a regular expression, tracking files
+that have already been processed (so that they are not processed again), filtering files based
+on the hashcode of the file names in the presence of partitioning so that each file is
+processed by a unique partition. This class can be extended if necessary to provide
+additional capabilities such as scanning multiple directories.</p>
+<p>It tracks the current file offset as part of checkpoint state. It it fails and is restarted
+by the platform, it will seek to the saved offset to avoid duplicate processing. Exactly once processing
+for fault tolerance is handled using window data manager. For more details check the blog about <a href="https://www.datatorrent.com/blog/fault-tolerant-file-processing/">Fault-Tolerant File Processing</a>.
+It supports both static and dynamic partitioning.</p>
+<h2 id="use-cases">Use Cases</h2>
+<p>This operator is suitable for use in an environment where small to medium sized files are
+deposited in a specific directory on a regular basis. For very large files a better alternative
+is the <code>FileSplitter</code> and <code>BlockReader</code> combination since they allow such files to be processed
+by multiple partitions to achieve higher throughput. Additionally, files which are continually
+modified by other processes are not suitable for processing with this operator since they may
+yield unpredictable results.</p>
+<h2 id="how-to-use">How to Use?</h2>
+<p>The tuple type in the abstract class is a generic parameter.
+Concrete subclasses need to choose an appropriate class (such as <code>String</code> or <code>byte[]</code>) for the
+generic parameter and also implement a couple of abstract methods: <code>readEntity()</code> to read
+the next tuple from the currently open file and <code>emit()</code> to process the next tuple.</p>
+<p>In principle, no ports need be defined in the rare case that the operator simply writes
+tuples to some external sink or merely maintains aggregated statistics. But in most common
+scenarios, the tuples need to be sent to one or more downstream operators for additional
+processing such as parsing, enrichment or aggregation; in such cases, appropriate
+output ports are defined and the <code>emit()</code> implementation dispatches tuples to the
+desired output ports.</p>
+<p>A simple concrete implementation is provided in Malhar: <code>LineByLineFileInputOperator</code>.
+It uses <code>String</code> for the generic parameter, defines a single output port and processes each
+line of the input file as a tuple. It is discussed further below.</p>
+<h2 id="partitioning">Partitioning</h2>
+<h4 id="static-partitioning">Static Partitioning</h4>
+<p>Configure parameter <code>partitionCount</code> to define the desired number of initial partitions
+(4 in this example).</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.partitionCount&lt;/name&gt;
+  &lt;value&gt;4&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>where <em>{OperatorName}</em> is the name of the input operator.</p>
+<h4 id="dynamic-partitioning">Dynamic Partitioning</h4>
+<p>Dynamic partitioning -- changing the number of partitions of one or more operators
+in a running application -- can be achieved in multiple ways:
+- Use the command line tool <code>apex</code> or the UI console to change the value of the
+  <code>partitionCount</code> property of the running operator. This change is detected in
+  <code>processStats()</code> (which is invoked periodically by the platform) where, if the
+  current partition count (<code>currentPartitions</code>) and the desired partition count
+  (<code>partitionCount</code>) differ, the <code>repartitionRequired</code> flag in the response is set.
+  This causes the platform to invoke <code>definePartitions()</code> to create a new set of
+  partitions with the desired count.
+- Override <code>processStats()</code> and within it, based on the statistics in the
+  incoming parameter or any other factors, define a new desired value of
+  <code>partitionCount</code> and finally, if this value differs from the current partition
+  count, set the <code>repartitionRequired</code> flag in the response.</p>
+<p>The details of actually creating the new set of partitions can be customized by overriding
+the <code>definePartitions()</code> method. There are a couple of things to keep in mind when doing this.
+The first is that repartitioning needs some care when the operator has state (as is the
+case here): Existing state from current operator partitions needs to redistributed to the
+new partitions in a logically consistent way. The second is that some or all of the
+current set of partitions, which is an input parameter to <code>definePartitions()</code>, can be
+copied over to the new set; such partitions will continue running and will not be
+restarted. Any existing partitions that are not present in the new set will be shutdown.
+The current re-partitioning logic does not preserve any existing partitions, so upon
+a repartition event, all existing partitions are shutdown and the new ones started.</p>
+<h2 id="operator-information">Operator Information</h2>
+<ol>
+<li>Operator location: <strong><em>malhar-library</em></strong></li>
+<li>Available since: <strong><em>1.0.2</em></strong></li>
+<li>Operator state: <strong><em>Stable</em></strong></li>
+<li>Java Packages:<ul>
+<li>Operator: <strong><em><a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html">com.datatorrent.lib.io.fs.AbstractFileInputOperator</a></em></strong></li>
+</ul>
+</li>
+</ol>
+<h3 id="abstractfileinputoperator">AbstractFileInputOperator</h3>
+<p>This is the abstract implementation that, as noted above, scans a single directory.
+It can be extended to modify functionality or add new capabilities. For example, the
+directory scanner can be overriden to monitor multiple directories. <a href="https://github.com/DataTorrent/examples/tree/master/tutorials/fileIO-multiDir">This</a> example demonstrates how to do that.
+As noted in the overview above, this class has no ports, so concrete subclasses will need to
+provide them if necessary.</p>
+<p><img alt="AbstractFileInputOperator.png" src="../images/fsInput/operatorsClassDiagram.png" /></p>
+<h3 id="properties-of-abstractfileinputoperator"><a name="AbstractFileInputOperatorProps"></a>Properties of AbstractFileInputOperator</h3>
+<p>Several properties are available to configure the behavior of this operator and they are
+summarized in the table below. Of these, only <code>directory</code> is required: it specifies
+the path of the monitored directory. It can be set like this:</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.directory&lt;/name&gt;
+  &lt;value&gt;/tmp/fileInput&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<p>If new files appear with high frequency in this directory
+and they need to be processed as soon as they appear, reduce the value of <code>scanIntervalMillis</code>;
+if they appear rarely or if some delay in processing a new file is acceptable, increase it.
+Obviously, smaller values will result in greater IO activity for the corresponding filesystem.</p>
+<p>The platform invokes the <code>emitTuples()</code> callback multiple time in each streaming window; within
+a single such call, if a large number of tuples are emitted, there is some risk that they
+may overwhelm the downstream operators especially if they are performing some compute intensive
+operation. For such cases, output can be throttled by reducing the value of the
+<code>emitBatchSize</code> property. Conversely, if the downstream operators can handle the load, increase
+the value to enhance throughput.</p>
+<p>The <code>partitionCount</code> parameter has already been discussed above.</p>
+<p>Occasionally, some files get into a bad state and cause errors when an attempt is made to
+read from them. The causes vary depending on the filesystem type ranging from corrupted
+filesystems to network issues. In such cases, the operator will retry reading from such
+files a limited number of times before blacklisting those files. This retry count is
+defined by the <code>maxRetryCount</code> property.</p>
+<p>Finally, the specific scanner class used to monitor the input directories can be configured
+by setting the <code>scanner</code> property.</p>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>directory</em></td>
+<td>absolute path of directory to be scanned</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>scanIntervalMillis</em></td>
+<td>Interval in milliseconds after which directory should be scanned for new files</td>
+<td>int</td>
+<td>No</td>
+<td>5000</td>
+</tr>
+<tr>
+<td><em>emitBatchSize</em></td>
+<td>Maximum number of tuples to emit in a single call to the <code>emitTuples()</code> callback (see explanation above).</td>
+<td>int</td>
+<td>No</td>
+<td>1000</td>
+</tr>
+<tr>
+<td><em>partitionCount</em></td>
+<td>Desired number of partitions</td>
+<td>int</td>
+<td>No</td>
+<td>1</td>
+</tr>
+<tr>
+<td><em>maxRetryCount</em></td>
+<td>Maximum number of times the operator will attempt to process a file</td>
+<td>int</td>
+<td>No</td>
+<td>5</td>
+</tr>
+<tr>
+<td><em>scanner</em></td>
+<td>Scanner to scan new files in directory</td>
+<td><a href="#DirectoryScanner">DirectoryScanner</a></td>
+<td>No</td>
+<td>DirectoryScanner</td>
+</tr>
+</tbody>
+</table>
+<h4 id="properties-of-directoryscanner"><a name="DirectoryScanner"></a>Properties of DirectoryScanner</h4>
+<p>The directory scanner has one optional property: a regular expression to filter files
+of interest. If absent, all files in the source directory are processed. It can be
+set like this:</p>
+<pre><code class="xml">&lt;property&gt;
+  &lt;name&gt;dt.operator.{OperatorName}.prop.scanner.filePatternRegexp&lt;/name&gt;
+  &lt;value&gt;/tmp/fileInput&lt;/value&gt;
+&lt;/property&gt;
+</code></pre>
+
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>filePatternRegexp</em></td>
+<td>regex to select files from input directory</td>
+<td>String</td>
+<td>No</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<h3 id="ports">Ports</h3>
+<p>This operator has no ports.</p>
+<h2 id="abstract-methods">Abstract Methods</h2>
+<p>As described above, concrete subclasses need to provide implementations for these two
+methods:</p>
+<pre><code class="java">void emit(T tuple);
+T readEntity();
+</code></pre>
+
+<p>Examples of implementations are in the <code>LineByLineFileInputOperator</code> operator and also in
+the example at the end of this guide.</p>
+<h2 id="derived-classes">Derived Classes</h2>
+<h3 id="1-abstractftpinputoperator">1. AbstractFTPInputOperator</h3>
+<p>The class is used to read files from FTP file system. As for the above abstract class, concrete
+subclasses need to implement the
+<a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#readEntity">readEntity</a> and
+<a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#emit">emit</a> methods.</p>
+<h4 id="properties"><a name="AbstractFTPInputOperatorProps"></a>Properties</h4>
+<p>This operator defines following additional properties beyond those defined in the
+<a href="#AbstractFileInputOperatorProps">parent class</a>.</p>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>host</em></td>
+<td>Hostname of ftp server.</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+<tr>
+<td><em>port</em></td>
+<td>Port of ftp server.</td>
+<td>int</td>
+<td>No</td>
+<td>21 (default ftp port)</td>
+</tr>
+<tr>
+<td><em>userName</em></td>
+<td>Username which is used for login to the server.</td>
+<td>String</td>
+<td>No</td>
+<td>anonymous</td>
+</tr>
+<tr>
+<td><em>password</em></td>
+<td>Password which is used for login to the server.</td>
+<td>String</td>
+<td>No</td>
+<td>gues</td>
+</tr>
+</tbody>
+</table>
+<h4 id="ports_1">Ports</h4>
+<p>This operator has no ports.</p>
+<h3 id="2-ftpstringinputoperator">2. FTPStringInputOperator</h3>
+<p>This class extends AbstractFTPInputOperator and  implements abstract methods to read files available on FTP file system line by line.</p>
+<h4 id="properties_1"><a name="FTPStringInputOperatorProps"></a>Properties</h4>
+<p>This operator defines no additional properties beyond those defined in the
+<a href="#AbstractFTPInputOperatorProps">parent class</a>.</p>
+<h4 id="ports_2">Ports</h4>
+<table>
+<thead>
+<tr>
+<th><strong>Port</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>output</em></td>
+<td>Tuples that are read from file are emitted on this port</td>
+<td>String</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<h3 id="3-abstractparquetfilereader">3. AbstractParquetFileReader</h3>
+<p>Reads Parquet files from input directory using GroupReadSupport. Derived classes need to implement <a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/contrib/parquet/AbstractParquetFileReader.html#convertGroup(Group)">convertGroup(Group)</a> method to convert Group to other type. Also it should implement  <a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#readEntity()">readEntity()</a> and <a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#emit(T)">emit(T)</a> methods.</p>
+<h4 id="properties-of-abstractparquetfilereader"><a name="AbstractParquetFileReaderProps"></a>Properties of AbstractParquetFileReader</h4>
+<p>This operator defines following additional properties beyond those defined in the
+<a href="#AbstractFileInputOperatorProps">parent class</a>.</p>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>parquetSchema</em></td>
+<td>Parquet Schema to parse record.</td>
+<td>String</td>
+<td>Yes</td>
+<td>N/A</td>
+</tr>
+</tbody>
+</table>
+<h4 id="ports_3">Ports</h4>
+<p>This operator has no ports.</p>
+<h3 id="4-abstractthroughputfileinputoperator">4. AbstractThroughputFileInputOperator</h3>
+<p>This operator extends <code>AbstractFileInputOperator</code> by providing the capability to partition
+dynamically based the file backlog. The user can set the preferred number of pending files per operator as well as the maximum number of operators and define a re-partition interval. If a physical operator runs out of files to process and an amount of time greater than or equal to the repartition interval has passed then a new number of operators are created to accommodate the remaining pending files. Derived classes need to implement <a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#readEntity()">readEntity()</a> and <a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/lib/io/fs/AbstractFileInputOperator.html#emit(T)">emit(T)</a> methods.</p>
+<h4 id="properties-of-abstractthroughputfileinputoperator"><a name="AbstractThroughputFileInputOperatorProps"></a>Properties of AbstractThroughputFileInputOperator</h4>
+<p>This operator defines following additional properties beyond those defined in the
+<a href="#AbstractFileInputOperatorProps">parent class</a>.</p>
+<table>
+<thead>
+<tr>
+<th><strong>Property</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+<th><strong>Default Value</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>repartitionInterval</em></td>
+<td>The minimum amount of time that must pass in milliseconds before the operator can be repartitioned.</td>
+<td>long</td>
+<td>No</td>
+<td>5 minutes</td>
+</tr>
+<tr>
+<td><em>preferredMaxPendingFilesPerOperator</em></td>
+<td>the preferred number of pending files per operator.</td>
+<td>int</td>
+<td>No</td>
+<td>10</td>
+</tr>
+<tr>
+<td><em>partitionCount</em></td>
+<td>the maximum number of partitions for the operator.</td>
+<td>int</td>
+<td>No</td>
+<td>1</td>
+</tr>
+</tbody>
+</table>
+<h4 id="ports_4">Ports</h4>
+<p>This operator has no ports.</p>
+<h3 id="5-linebylinefileinputoperator">5. LineByLineFileInputOperator</h3>
+<p>As mentioned in the overview above, this operator defines a single output port; it reads files
+as lines and emits them as Java Strings on the output port. The output port <em>must</em> be connected.
+Lines are extracted using the Java <code>BufferedReader</code> class and the default character encoding.
+An example illustrating the use of a custom encoding (such as UTF_8) is provided below</p>
+<h4 id="properties_2">Properties</h4>
+<p>This operator defines no additional properties beyond those defined in the
+<a href="#AbstractFileInputOperatorProps">parent class</a>.</p>
+<h4 id="ports_5">Ports</h4>
+<table>
+<thead>
+<tr>
+<th><strong>Port</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>output</em></td>
+<td>Tuples that are read from file are emitted on this port</td>
+<td>String</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<h2 id="example-implementation-using-a-custom-character-encoding">Example Implementation Using a Custom Character Encoding</h2>
+<p>This example demonstrates how to extend the <code>AbstractFileInputOperator</code> to read
+UTF-8 encoded data.</p>
+<pre><code>public class EncodedDataReader extends AbstractFileInputOperator&lt;String&gt;
+{
+  public final transient DefaultOutputPort&lt;String&gt; output = new DefaultOutputPort&lt;&gt;();
+  protected transient BufferedReader br;
+
+  protected InputStream openFile(Path path) throws IOException
+  {
+    InputStream is = super.openFile(path);
+    br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
+    return is;
+  }
+
+  @Override
+  protected void closeFile(InputStream is) throws IOException
+  {
+    super.closeFile(is);
+    br.close();
+    br = null;
+  }
+
+  @Override
+  protected String readEntity() throws IOException
+  {
+    return br.readLine();
+  }
+
+  @Override
+  protected void emit(String tuple)
+  {
+    output.emit(tuple);
+  }
+}
+
+</code></pre>
+
+<h2 id="common-implementation-scenarios">Common Implementation Scenarios</h2>
+<p>Sometimes, downstream operators need to know which file each tuple came from; there are a
+number of ways of achieving this, each with its own tradeoffs. Some alternatives:</p>
+<ul>
+<li>If the generic tuple type is a String, each tuple can be prefixed with the file name
+  with a suitable separator, for example: <code>foo.txt: first line</code>. This works but
+  has obvious additional costs in both processing (to parse out the two pieces of each
+  tuple) and network bandwidth utilization.</li>
+<li>Define a custom tuple class with two fields: one for the file name and one for tuple data.
+  The costs are similar to the previous approach though the code is simpler since
+  parsing is handled behind the scenes by the serialization process.</li>
+<li>Define the tuple type to be <code>Object</code> and emit either a custom <code>Tuple</code> object for actual
+  tuple data or <strong>BOF</strong>/<strong>EOF</strong> objects with the name of the file when a new file begins
+  or the current file ends. Here, the additional bandwidth consumed is
+  minimal (just 2 additional tuples at file boundaries) but the type of each tuple needs
+  to be checked using <code>instanceof</code> in the downstream operators which has some runtime cost.</li>
+<li>Similar to the previous approach but define an additional control port dedicated to
+  the BOF/EOF control tuples. This approach eliminates the runtime cost of using <code>instanceof</code>
+  but some care is needed because (a) the order of tuples arriving at multiple input ports
+  in downstream operators cannot be guaranteed -- for example, the BOF/EOF control tuples
+  may arrive before some of the actual data tuples; and (b) since the operator may read
+  more than one file in a single streaming window, the downstream operator may not be
+  able to tell which tuples belong to which file. One way of dealing with this is to
+  stop emitting data tuples until the next <code>endWindow()</code> callback when an EOF is detected
+  for the current file; that way, if the downstream operator receives an EOF control tuple,
+  it has the guarantee that all the data tuples received in the same window belong to the
+  current file.</li>
+</ul>
+<p>Of course, other strategies are possible depending on the needs of the particular situation.</p>
+<p>When used in a long-running application where a very large number of files are are processed
+over time, the internal state (consisting of properties like <code>processedFiles</code>) may grow
+correspondingly and this may have some performance impact since each checkpoint saves the
+entire operator state. In such situations, it is useful to explore options such as moving
+processed files to another directory and trimming operator state variables suitably.</p>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
+      
+        <a href="../csvParserOperator/" class="btn btn-neutral float-right" title="Csv Parser">Next <span class="icon icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../block_reader/" class="btn btn-neutral" title="Block Reader"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+</footer>
+	  
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../block_reader/" style="color: #fcfcfc;">&laquo; Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../csvParserOperator/" style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/blockreader/classdiagram.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/blockreader/classdiagram.png b/content/docs/malhar-3.6/operators/images/blockreader/classdiagram.png
new file mode 100644
index 0000000..8fbd6fc
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/blockreader/classdiagram.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/blockreader/flowdiagram.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/blockreader/flowdiagram.png b/content/docs/malhar-3.6/operators/images/blockreader/flowdiagram.png
new file mode 100644
index 0000000..1b2897d
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/blockreader/flowdiagram.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/blockreader/fsreaderexample.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/blockreader/fsreaderexample.png b/content/docs/malhar-3.6/operators/images/blockreader/fsreaderexample.png
new file mode 100644
index 0000000..571b60a
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/blockreader/fsreaderexample.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/blockreader/totalBacklogProcessing.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/blockreader/totalBacklogProcessing.png b/content/docs/malhar-3.6/operators/images/blockreader/totalBacklogProcessing.png
new file mode 100644
index 0000000..2ed481f
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/blockreader/totalBacklogProcessing.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/csvParser/CSVParser.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/csvParser/CSVParser.png b/content/docs/malhar-3.6/operators/images/csvParser/CSVParser.png
new file mode 100644
index 0000000..523ba0b
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/csvParser/CSVParser.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/deduper/image00.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/deduper/image00.png b/content/docs/malhar-3.6/operators/images/deduper/image00.png
new file mode 100644
index 0000000..ec3e292
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/deduper/image00.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/deduper/image01.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/deduper/image01.png b/content/docs/malhar-3.6/operators/images/deduper/image01.png
new file mode 100644
index 0000000..b9e35a9
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/deduper/image01.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/deduper/image02.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/deduper/image02.png b/content/docs/malhar-3.6/operators/images/deduper/image02.png
new file mode 100644
index 0000000..689bdfe
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/deduper/image02.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/deduper/image03.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/deduper/image03.png b/content/docs/malhar-3.6/operators/images/deduper/image03.png
new file mode 100644
index 0000000..087a0b1
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/deduper/image03.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/deduper/image04.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/deduper/image04.png b/content/docs/malhar-3.6/operators/images/deduper/image04.png
new file mode 100644
index 0000000..4d3bd32
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/deduper/image04.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/fileoutput/FileRotation.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/fileoutput/FileRotation.png b/content/docs/malhar-3.6/operators/images/fileoutput/FileRotation.png
new file mode 100644
index 0000000..624c96e
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/fileoutput/FileRotation.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/fileoutput/diagram1.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/fileoutput/diagram1.png b/content/docs/malhar-3.6/operators/images/fileoutput/diagram1.png
new file mode 100644
index 0000000..0a260de
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/fileoutput/diagram1.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/filesplitter/baseexample.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/filesplitter/baseexample.png b/content/docs/malhar-3.6/operators/images/filesplitter/baseexample.png
new file mode 100644
index 0000000..6af2b44
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/filesplitter/baseexample.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/filesplitter/classdiagram.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/filesplitter/classdiagram.png b/content/docs/malhar-3.6/operators/images/filesplitter/classdiagram.png
new file mode 100644
index 0000000..6490368
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/filesplitter/classdiagram.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/filesplitter/inputexample.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/filesplitter/inputexample.png b/content/docs/malhar-3.6/operators/images/filesplitter/inputexample.png
new file mode 100644
index 0000000..65e199f
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/filesplitter/inputexample.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/filesplitter/sequence.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/filesplitter/sequence.png b/content/docs/malhar-3.6/operators/images/filesplitter/sequence.png
new file mode 100644
index 0000000..85cf702
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/filesplitter/sequence.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/fsInput/operatorsClassDiagram.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/fsInput/operatorsClassDiagram.png b/content/docs/malhar-3.6/operators/images/fsInput/operatorsClassDiagram.png
new file mode 100644
index 0000000..31c7a0d
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/fsInput/operatorsClassDiagram.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/jdbcinput/operatorsClassDiagram.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/jdbcinput/operatorsClassDiagram.png b/content/docs/malhar-3.6/operators/images/jdbcinput/operatorsClassDiagram.png
new file mode 100644
index 0000000..4b0432d
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/jdbcinput/operatorsClassDiagram.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/jsonFormatter/JsonFormatter.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/jsonFormatter/JsonFormatter.png b/content/docs/malhar-3.6/operators/images/jsonFormatter/JsonFormatter.png
new file mode 100644
index 0000000..f457c68
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/jsonFormatter/JsonFormatter.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/jsonParser/JsonParser.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/jsonParser/JsonParser.png b/content/docs/malhar-3.6/operators/images/jsonParser/JsonParser.png
new file mode 100644
index 0000000..7235efc
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/jsonParser/JsonParser.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/kafkainput/image00.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/kafkainput/image00.png b/content/docs/malhar-3.6/operators/images/kafkainput/image00.png
new file mode 100644
index 0000000..0fa00e8
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/kafkainput/image00.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/allowed-lateness.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/allowed-lateness.png b/content/docs/malhar-3.6/operators/images/windowedOperator/allowed-lateness.png
new file mode 100644
index 0000000..34153ca
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/allowed-lateness.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-1.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-1.png b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-1.png
new file mode 100644
index 0000000..78712e8
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-1.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-2.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-2.png b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-2.png
new file mode 100644
index 0000000..19bfab0
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-2.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-3.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-3.png b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-3.png
new file mode 100644
index 0000000..e7810eb
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-3.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-4.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-4.png b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-4.png
new file mode 100644
index 0000000..43838c8
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/session-windows-4.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/sliding-time-windows.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/sliding-time-windows.png b/content/docs/malhar-3.6/operators/images/windowedOperator/sliding-time-windows.png
new file mode 100644
index 0000000..dc55e1a
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/sliding-time-windows.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/images/windowedOperator/time-windows.png
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/images/windowedOperator/time-windows.png b/content/docs/malhar-3.6/operators/images/windowedOperator/time-windows.png
new file mode 100644
index 0000000..cba471c
Binary files /dev/null and b/content/docs/malhar-3.6/operators/images/windowedOperator/time-windows.png differ

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/jmsInputOperator/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/jmsInputOperator/index.html b/content/docs/malhar-3.6/operators/jmsInputOperator/index.html
new file mode 100644
index 0000000..ea8db2f
--- /dev/null
+++ b/content/docs/malhar-3.6/operators/jmsInputOperator/index.html
@@ -0,0 +1,461 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>JMS Input - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "JMS Input";
+    var mkdocs_page_input_path = "operators/jmsInputOperator.md";
+    var mkdocs_page_url = "/operators/jmsInputOperator/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">JMS Input</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#jms-input-operator">JMS INPUT OPERATOR</a></li>
+                
+                    <li><a class="toctree-l4" href="#introduction-about-the-jms-input-operator">Introduction: About the JMS Input Operator</a></li>
+                
+                    <li><a class="toctree-l4" href="#why-is-it-needed">Why is it needed ?</a></li>
+                
+                    <li><a class="toctree-l4" href="#jmsbase">JMSBase</a></li>
+                
+                    <li><a class="toctree-l4" href="#abstractjmsinputoperator">AbstractJMSInputOperator</a></li>
+                
+                    <li><a class="toctree-l4" href="#concrete-classes">Concrete Classes</a></li>
+                
+                    <li><a class="toctree-l4" href="#application-examples">Application Examples</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">Csv Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonFormatter/">Json Formatter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>JMS Input</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="jms-input-operator">JMS INPUT OPERATOR</h1>
+<h3 id="introduction-about-the-jms-input-operator">Introduction: About the JMS Input Operator</h3>
+<p>The JMS input operator consumes data from a messaging system using the JMS client API. JMS not being a communication protocol, the operator needs an underlying JMS client API library to talk to a messaging system. Currently the operator has been tested with the Amazon SQS and Apache ActiveMQ System brokers via their respective JMS client API libraries.</p>
+<h3 id="why-is-it-needed">Why is it needed ?</h3>
+<p>You will need the operator to read data from a messaging system (e.g. Apache ActiveMQ) via the JMS client API. The operator supports both the publish-subscribe (topics) and point-to-point (queues) modes. The operator currently does not support partitioning and dynamic scalability.</p>
+<h3 id="jmsbase">JMSBase</h3>
+<p>This class encapsulates various JMS properties and behaviors and maintains connections with the JMS broker. This is the base class for JMS input and output adaptor operators. Operators should not directly subclass JMSBase but one of the JMS input or output operators.</p>
+<h3 id="abstractjmsinputoperator">AbstractJMSInputOperator</h3>
+<p>This abstract implementation serves as the base class for consuming generic messages from an external messaging system. Concrete subclasses implement conversion and emit methods to emit tuples for a concrete type. JMSStringInputOperator is one such subclass in the library used for String messages. JMSObjectInputOperator is another one used for multiple message types where the user has the ability to get String, byte array, Map or POJO messages on the respective output ports.</p>
+<h4 id="configuration-parameters">Configuration Parameters</h4>
+<p>Common configuration parameters are described here.
+<table>
+<col width="25%" />
+<col width="75%" />
+<tbody>
+<tr class="odd">
+<td align="left"><p>Parameter</p></td>
+<td align="left"><p>Description</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>windowDataManager</p></td>
+<td align="left"><p>This is an instance of <code>WindowDataManager</code> that implements idempotency. Idempotency ensures that an operator will process the same set of messages in a window before and after a failure. For example, say the operator completed window 10 and failed before or during window 11. If the operator gets restored at window 10, it will replay the messages of window 10 which were saved from the previous run before the failure. Although important, idempotency comes at a price because an operator needs to persist some state at the end of each window. Default Value = <code>org.apache.apex.malhar.lib.wal.FSWindowDataManager</code></p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>connectionFactoryBuilder</p></td>
+<td align="left"><p>The operator uses the builder pattern that requires the user to specify an instance of <code>com.datatorrent.lib.io.jms.JMSBase.ConnectionFactoryBuilder</code>. This builder creates the connection factory that encapsulates the underlying JMS client API library (e.g. ActiveMQ or Amazon SQS). By default the operator uses <code>com.datatorrent.lib.io.jms.JMSBase.DefaultConnectionFactoryBuilder</code> which is used for ActiveMQ. One of the examples below describes the Amazon SQS use-case. </td>
+</tr>
+</tbody>
+</table></p>
+<h4 id="abstract-methods">Abstract Methods</h4>
+<p>The following abstract methods need to be implemented by concrete subclasses.</p>
+<p>T convert(Message message): This method converts a JMS Message object to type T.</p>
+<p>void emit(T payload): This method emits a tuple given the payload extracted from a JMS message.</p>
+<h3 id="concrete-classes">Concrete Classes</h3>
+<ol>
+<li>
+<p>JMSStringInputOperator :
+This class extends AbstractJMSInputOperator to deliver String payloads in the tuple.</p>
+</li>
+<li>
+<p>JMSObjectInputOperator:
+This class extends AbstractJMSInputOperator to deliver String, byte array, Map or POJO payloads in the tuple.</p>
+</li>
+</ol>
+<h3 id="application-examples">Application Examples</h3>
+<h4 id="activemq-example">ActiveMQ Example</h4>
+<p>The source code for the tutorial can be found here:</p>
+<p><a href="https://github.com/DataTorrent/examples/tree/master/tutorials/jmsActiveMQ">https://github.com/DataTorrent/examples/tree/master/tutorials/jmsActiveMQ</a></p>
+<p>The following code snippet from the example illustrates how the DAG is created:</p>
+<pre><code class="java">  @Override
+  public void populateDAG(DAG dag, Configuration conf)
+  {
+    JMSStringInputOperator amqInput = dag.addOperator(&quot;amqIn&quot;, 
+        new JMSStringInputOperator());
+
+    LineOutputOperator out = dag.addOperator(&quot;fileOut&quot;, new LineOutputOperator());
+
+    dag.addStream(&quot;data&quot;, amqInput.output, out.input);
+  }
+</code></pre>
+
+<p>The DAG consists of only 2 operators: the <code>JMSStringInputOperator</code> which is the input operator that feeds received ActiveMQ messages into the output operator <code>LineOutputOperator</code> which outputs these messages into a file or files.</p>
+<p>The default connectionFactoryBuilder supports ActiveMQ so there is no need to set this value. However the following ActiveMQ related values need to be set either from properties files or using the appropriate setter methods in the code:</p>
+<table>
+<col width="25%" />
+<col width="75%" />
+<tbody>
+<tr class="odd">
+<td align="left"><p>Value</p></td>
+<td align="left"><p>Description</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>connectionFactoryProperties</p></td>
+<td align="left"><p>This is a Map of key and value strings and can be set directly from configuration as in the example above. The table below describes the most important properties.</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>topic</p></td>
+<td align="left"><p>This boolean value is set to true for the publish-subscribe case and false for the PTP (point-to-point) case.</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>subject</p></td>
+<td align="left"><p>This is the queue name for PTP (point-to-point) use-case and topic name for the publish-subscribe use case.</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>durable</p></td>
+<td align="left"><p>This boolean value is set to true for durable subscriptionss, false otherwise. Durable subscriptions save messages to persistent storage until consumed. Used only when the clientId (see below) is set.</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>clientId</p></td>
+<td align="left"><p>The client-ID for this ActiveMQ consumer in the durable subscription mode as described above.</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>transacted</p></td>
+<td align="left"><p>This boolean value is set to true for transacted JMS sessions as described in 
+<a href="https://docs.oracle.com/javaee/7/api/javax/jms/Session.html">Session</a>.</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>ackMode</p></td>
+<td align="left"><p>This string value sets the acknowledgement mode as described in 
+<a href="https://docs.oracle.com/javaee/7/api/javax/jms/Session.html#field.summary">Session fields</a>.</p></td>
+</tr>
+</tbody>
+</table>
+
+<p>The following table describes the string properties to be set in the map that is passed in the connectionFactoryProperties value described above.</p>
+<table>
+<col width="25%" />
+<col width="75%" />
+<tbody>
+<tr class="odd">
+<td align="left"><p>Property Name</p></td>
+<td align="left"><p>Description</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>brokerURL</p></td>
+<td align="left"><p>The <a href="http://activemq.apache.org/configuring-transports.html">connection URL</a> 
+used to connect to the ActiveMQ broker</p></td></tr>
+<tr class="even">
+<td align="left"><p>userName</p></td>
+<td align="left"><p>The JMS userName used by connections created by this factory (optional when anonymous access is used)</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>password</p></td>
+<td align="left"><p>The JMS password used for connections created from this factory (optional when anonymous access is used)</p></td>
+</tr>
+</tbody>
+</table>
+
+<p>These properties can be set from the properties.xml file as shown below 
+(from the example <a href="https://github.com/DataTorrent/examples/tree/master/tutorials/jmsActiveMQ">https://github.com/DataTorrent/examples/tree/master/tutorials/jmsActiveMQ</a> ).</p>
+<pre><code class="xml">&lt;configuration&gt;
+  &lt;property&gt;
+    &lt;name&gt;dt.operator.amqIn.prop.connectionFactoryProperties.brokerURL&lt;/name&gt;
+    &lt;value&gt;vm://localhost&lt;/value&gt;
+  &lt;/property&gt;
+  &lt;property&gt;
+    &lt;name&gt;dt.operator.amqIn.prop.subject&lt;/name&gt;
+    &lt;value&gt;jms4Amq&lt;/value&gt;
+  &lt;/property&gt;
+&lt;/configuration&gt;                                                                                                        
+</code></pre>
+
+<h4 id="sqs-example">SQS Example</h4>
+<p>The source code for the tutorial can be found here:</p>
+<p><a href="https://github.com/DataTorrent/examples/tree/master/tutorials/jmsSqs">https://github.com/DataTorrent/examples/tree/master/tutorials/jmsSqs</a></p>
+<p>The following code snippet from the example illustrates how the DAG is created:</p>
+<pre><code class="java"> @Override
+ public void populateDAG(DAG dag, Configuration conf)
+ {
+
+   JMSStringInputOperator sqsInput = dag.addOperator(&quot;sqsIn&quot;, 
+       new JMSStringInputOperator());
+
+   MyConnectionFactoryBuilder factoryBuilder = new MyConnectionFactoryBuilder();
+
+   factoryBuilder.sqsDevCredsFilename = conf.get(SQSDEV_CREDS_FILENAME_PROPERTY);
+
+   sqsInput.setConnectionFactoryBuilder(factoryBuilder);
+
+   LineOutputOperator out = dag.addOperator(&quot;fileOut&quot;, new LineOutputOperator());
+
+   dag.addStream(&quot;data&quot;, sqsInput.output, out.input);
+ }
+</code></pre>
+
+<p>The DAG consists of only 2 operators: the <code>JMSStringInputOperator</code> which is the input operator that feeds received SQS messages into the output operator <code>LineOutputOperator</code> which outputs these messages into a file or files. The code also shows how the AWS/SQS credentials are initialized in the factory builder. </p>
+<p>For SQS you will have to provide a custom connectionFactoryBuilder as shown in the example above and in <a href="https://github.com/awslabs/amazon-sqs-java-messaging-lib/blob/master/src/main/java/com/amazon/sqs/javamessaging/SQSConnectionFactory.java">SQSConnectionFactory.java</a>. The builder is typically used to supply AWS region and credential information that cannot be supplied via any JMS interfaces.</p>
+<p>The following code snippet shows a typical Builder implementation that can be supplied to the operator. The AWS credentials are supplied via a <a href="http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/PropertiesFileCredentialsProvider.html">PropertiesFileCredentialsProvider</a> object in which sqsCredsFilename is the fully qualified path to a properties file from which the AWS security credentials are to be loaded. For example <code>/etc/somewhere/credentials.properties</code></p>
+<pre><code class="java">static class MyConnectionFactoryBuilder implements JMSBase.ConnectionFactoryBuilder {
+
+String sqsCredsFilename;
+
+MyConnectionFactoryBuilder()
+{
+}
+
+@Override
+public ConnectionFactory buildConnectionFactory() 
+{
+  // Create the connection factory using the properties file credential provider.
+  // Connections this factory creates can talk to the queues in us-east-1 region. 
+  SQSConnectionFactory connectionFactory =
+    SQSConnectionFactory.builder()
+      .withRegion(Region.getRegion(Regions.US_EAST_1))
+      .withAWSCredentialsProvider(new PropertiesFileCredentialsProvider(sqsCredsFilename))
+      .build();
+    return connectionFactory;
+  }
+}
+</code></pre>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
+      
+        <a href="../file_splitter/" class="btn btn-neutral float-right" title="File Splitter">Next <span class="icon icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../kafkaInputOperator/" class="btn btn-neutral" title="Kafka Input"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+</footer>
+	  
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../kafkaInputOperator/" style="color: #fcfcfc;">&laquo; Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../file_splitter/" style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/apex-site/blob/d7a21b68/content/docs/malhar-3.6/operators/jsonFormatter/index.html
----------------------------------------------------------------------
diff --git a/content/docs/malhar-3.6/operators/jsonFormatter/index.html b/content/docs/malhar-3.6/operators/jsonFormatter/index.html
new file mode 100644
index 0000000..95241f3
--- /dev/null
+++ b/content/docs/malhar-3.6/operators/jsonFormatter/index.html
@@ -0,0 +1,407 @@
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  
+  
+  
+  <title>Json Formatter - Apache Apex Malhar Documentation</title>
+  
+
+  <link rel="shortcut icon" href="../../favicon.ico">
+  
+
+  
+  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
+
+  <link rel="stylesheet" href="../../css/theme.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" />
+  <link rel="stylesheet" href="../../css/highlight.css">
+
+  
+  <script>
+    // Current page data
+    var mkdocs_page_name = "Json Formatter";
+    var mkdocs_page_input_path = "operators/jsonFormatter.md";
+    var mkdocs_page_url = "/operators/jsonFormatter/";
+  </script>
+  
+  <script src="../../js/jquery-2.1.1.min.js"></script>
+  <script src="../../js/modernizr-2.8.3.min.js"></script>
+  <script type="text/javascript" src="../../js/highlight.pack.js"></script>
+  <script src="../../js/theme.js"></script> 
+
+  
+</head>
+
+<body class="wy-body-for-nav" role="document">
+
+  <div class="wy-grid-for-nav">
+
+    
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
+      <div class="wy-side-nav-search">
+        <a href="../.." class="icon icon-home"> Apache Apex Malhar Documentation</a>
+        <div role="search">
+  <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+  </form>
+</div>
+      </div>
+
+      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
+        <ul class="current">
+          
+            <li>
+    <li class="toctree-l1 ">
+        <a class="" href="../..">Apache Apex Malhar</a>
+        
+    </li>
+<li>
+          
+            <li>
+    <ul class="subnav">
+    <li><span>Operators</span></li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../kafkaInputOperator/">Kafka Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jmsInputOperator/">JMS Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_splitter/">File Splitter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../block_reader/">Block Reader</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../fsInputOperator/">File Input</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../csvParserOperator/">Csv Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../file_output/">File Output</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../enricher/">Enricher</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../filter/">Filter</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../deduper/">Deduper</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../windowedOperator/">Windowed Operator</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../jsonParser/">Json Parser</a>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 current">
+        <a class="current" href="./">Json Formatter</a>
+        
+            <ul>
+            
+                <li class="toctree-l3"><a href="#json-formatter">Json Formatter</a></li>
+                
+                    <li><a class="toctree-l4" href="#operator-objective">Operator Objective</a></li>
+                
+                    <li><a class="toctree-l4" href="#class-diagram">Class Diagram</a></li>
+                
+                    <li><a class="toctree-l4" href="#operator-information">Operator Information</a></li>
+                
+                    <li><a class="toctree-l4" href="#properties-attributes-and-ports">Properties, Attributes and Ports</a></li>
+                
+                    <li><a class="toctree-l4" href="#partitioning">Partitioning</a></li>
+                
+                    <li><a class="toctree-l4" href="#example">Example</a></li>
+                
+                    <li><a class="toctree-l4" href="#advance-features">Advance Features</a></li>
+                
+            
+            </ul>
+        
+    </li>
+
+        
+            
+    <li class="toctree-l1 ">
+        <a class="" href="../transform/">Transform Operator</a>
+        
+    </li>
+
+        
+    </ul>
+<li>
+          
+        </ul>
+      </div>
+      &nbsp;
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
+
+      
+      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
+        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+        <a href="../..">Apache Apex Malhar Documentation</a>
+      </nav>
+
+      
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+    <li><a href="../..">Docs</a> &raquo;</li>
+    
+      
+        
+          <li>Operators &raquo;</li>
+        
+      
+    
+    <li>Json Formatter</li>
+    <li class="wy-breadcrumbs-aside">
+      
+    </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main">
+            <div class="section">
+              
+                <h1 id="json-formatter">Json Formatter</h1>
+<h2 id="operator-objective">Operator Objective</h2>
+<p>Purpose of JsonFormatter is to consume Plain Old Java Object ("POJO") and write them as JSON.
+Json Formatter is <strong>idempotent</strong>, <strong>fault-tolerance</strong> &amp; <strong>statically/dynamically partitionable</strong>.</p>
+<h2 id="class-diagram">Class Diagram</h2>
+<p><img alt="JsonFormatter" src="../images/jsonFormatter/JsonFormatter.png" /></p>
+<h2 id="operator-information">Operator Information</h2>
+<ol>
+<li>Operator location:<strong>_malhar-library</strong></li>
+<li>Available since:<strong><em>3.2.0</em></strong></li>
+<li>Operator state:<strong><em>Evolving</em></strong></li>
+<li>Java Package:<a href="https://github.com/apache/apex-malhar/blob/master/library/src/main/java/com/datatorrent/lib/formatter/JsonFormatter.java">com.datatorrent.lib.formatter.JsonFormatter</a></li>
+</ol>
+<h2 id="properties-attributes-and-ports">Properties, Attributes and Ports</h2>
+<h3 id="platform-attributes-that-influences-operator-behavior">Platform Attributes that influences operator behavior</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Attribute</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>in.TUPLE_CLASS</em></td>
+<td>TUPLE_CLASS attribute on input port which tells operator the class of incoming POJO</td>
+<td>Class or FQCN</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<h3 id="ports">Ports</h3>
+<table>
+<thead>
+<tr>
+<th><strong>Port</strong></th>
+<th><strong>Description</strong></th>
+<th><strong>Type</strong></th>
+<th><strong>Mandatory</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><em>in</em></td>
+<td>Tuples that needs to be formatted are recieved on this port</td>
+<td>Object (POJO)</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><em>out</em></td>
+<td>Valid Tuples that are emitted as JSON</td>
+<td>String</td>
+<td>No</td>
+</tr>
+<tr>
+<td><em>err</em></td>
+<td>Invalid Tuples are emitted on this port</td>
+<td>Object</td>
+<td>No</td>
+</tr>
+</tbody>
+</table>
+<h2 id="partitioning">Partitioning</h2>
+<p>JSON Formatter is both statically and dynamically partitionable.</p>
+<h3 id="static-partitioning">Static Partitioning</h3>
+<p>This can be achieved in 2 ways</p>
+<ol>
+<li>Specifying the partitioner and number of partitions in the populateDAG() method</li>
+</ol>
+<pre><code class="java">JsonFormatter jsonFormatter = dag.addOperator(&quot;jsonFormatter&quot;, JsonFormatter.class);
+StatelessPartitioner&lt;JsonFormatter&gt; partitioner1 = new StatelessPartitioner&lt;JsonFormatter&gt;(2);
+dag.setAttribute(jsonFormatter, Context.OperatorContext.PARTITIONER, partitioner1 );
+</code></pre>
+
+<ol>
+<li>Specifying the partitioner in properties file.</li>
+</ol>
+<pre><code class="xml"> &lt;property&gt;
+   &lt;name&gt;dt.operator.{OperatorName}.attr.PARTITIONER&lt;/name&gt;
+   &lt;value&gt;com.datatorrent.common.partitioner.StatelessPartitioner:2&lt;/value&gt;
+ &lt;/property&gt;
+</code></pre>
+
+<p>where {OperatorName} is the name of the JsonFormatter operator.
+ Above lines will partition JsonFormatter statically 2 times. Above value can be changed accordingly to change the number of static partitions.</p>
+<h3 id="dynamic-paritioning">Dynamic Paritioning</h3>
+<p>JsonFormatter can be dynamically partitioned using an out-of-the-box partitioner:</p>
+<h4 id="throughput-based">Throughput based</h4>
+<p>Following code can be added to populateDAG method of application to dynamically partition JsonFormatter:</p>
+<pre><code class="java">JsonFormatter jsonFormatter = dag.addOperator(&quot;jsonFormatter&quot;, JsonFormatter.class);
+StatelessThroughputBasedPartitioner&lt;JsonFormatter&gt; partitioner = new StatelessThroughputBasedPartitioner&lt;&gt;();
+partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 10000));
+partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000));
+partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000));
+dag.setAttribute(JsonFormatter, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[]{partitioner}));
+dag.setAttribute(JsonFormatter, OperatorContext.PARTITIONER, partitioner);
+</code></pre>
+
+<p>Above code will dynamically partition JsonFormatter when the throughput changes.
+If the overall throughput of JsonFormatter goes beyond 30000 or less than 10000, the platform will repartition JsonFormatter
+to balance throughput of a single partition to be between 10000 and 30000.
+CooldownMillis of 10000 will be used as the threshold time for which the throughput change is observed.</p>
+<h2 id="example">Example</h2>
+<p>Example for Json Formatter can be found at: <a href="https://github.com/DataTorrent/examples/tree/master/tutorials/parser">https://github.com/DataTorrent/examples/tree/master/tutorials/parser</a></p>
+<h2 id="advance-features">Advance Features</h2>
+<p>JsonFormatter is based on <a href="https://github.com/FasterXML/jackson-databind">jackson-databind</a> and so users can make use of <a href="https://github.com/FasterXML/jackson-annotations">annotations</a> in POJO class. Here are few annotations that are relavant while using JsonFormatter
+1. <strong>@JsonProperty</strong> : Sometimes POJOs contain properties that has different name from incoming POJOs.You can specify names as:</p>
+<pre><code class="java">public class Ad{
+  @JsonProperty(&quot;desc&quot;)
+  public String description;
+  public List&lt;String&gt; sizes;
+}
+</code></pre>
+
+<ol>
+<li><strong>@JsonIgnore</strong> : Sometimes POJOs contain properties that you do not want to write out, so you can do:</li>
+</ol>
+<pre><code class="java">public class Value {
+  public int value;
+  @JsonIgnore
+  public int internalValue;
+}
+</code></pre>
+
+<ol>
+<li><strong>@JsonFormat</strong> :  Sometimes Date fields need to be printed in custom format, so you can do:</li>
+</ol>
+<pre><code class="java">public class Ad{
+  @JsonFormat(shape = JsonFormat.Shape.STRING, pattern = &quot;EEE, d MMM yyyy HH:mm:ss&quot;)
+   public Date startDate;
+}
+</code></pre>
+              
+            </div>
+          </div>
+          <footer>
+  
+    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
+      
+        <a href="../transform/" class="btn btn-neutral float-right" title="Transform Operator">Next <span class="icon icon-circle-arrow-right"></span></a>
+      
+      
+        <a href="../jsonParser/" class="btn btn-neutral" title="Json Parser"><span class="icon icon-circle-arrow-left"></span> Previous</a>
+      
+    </div>
+  
+
+  <hr/>
+
+  <div role="contentinfo">
+    <!-- Copyright etc -->
+    
+  </div>
+
+  Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
+</footer>
+	  
+        </div>
+      </div>
+
+    </section>
+
+  </div>
+
+<div class="rst-versions" role="note" style="cursor: pointer">
+    <span class="rst-current-version" data-toggle="rst-current-version">
+      
+      
+        <span><a href="../jsonParser/" style="color: #fcfcfc;">&laquo; Previous</a></span>
+      
+      
+        <span style="margin-left: 15px"><a href="../transform/" style="color: #fcfcfc">Next &raquo;</a></span>
+      
+    </span>
+</div>
+
+</body>
+</html>


Mime
View raw message