orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [4/6] orc git commit: Pushing update for ORC-65
Date Sat, 11 Jun 2016 18:15:56 GMT
http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/hive-ddl.html
----------------------------------------------------------------------
diff --git a/docs/hive-ddl.html b/docs/hive-ddl.html
index 1d22e26..02a58c8 100644
--- a/docs/hive-ddl.html
+++ b/docs/hive-ddl.html
@@ -105,25 +105,19 @@
     
   
     
-      <option value="/docs/index.html">Background</option>
-    
   
     
   
     
   
     
-  
+      <option value="/docs/index.html">Background</option>
     
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -140,21 +134,23 @@
   
     
   
-    
+
   
-    
+
   
     
   
     
+      <option value="/docs/adopters.html">ORC Adopters</option>
+    
   
     
-      <option value="/docs/types.html">Types</option>
+  
     
   
-
+    
   
-
+    
   
     
   
@@ -171,7 +167,7 @@
     
   
     
-      <option value="/docs/indexes.html">Indexes</option>
+  
     
   
     
@@ -189,8 +185,6 @@
 
   
     
-      <option value="/docs/acid.html">ACID support</option>
-    
   
     
   
@@ -216,16 +210,7 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Hive Usage">
-      
-
-
-  
-
   
     
   
@@ -238,12 +223,12 @@
     
   
     
-      <option value="/docs/hive-ddl.html">Hive DDL</option>
+      <option value="/docs/types.html">Types</option>
     
   
-    
+
   
-    
+
   
     
   
@@ -255,9 +240,7 @@
   
     
   
-
-  
-
+    
   
     
   
@@ -268,7 +251,7 @@
     
   
     
-      <option value="/docs/hive-config.html">Hive Configuration</option>
+      <option value="/docs/indexes.html">Indexes</option>
     
   
     
@@ -287,19 +270,14 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Format Specification">
-      
-
+  
 
   
 
   
     
-  
+      <option value="/docs/acid.html">ACID support</option>
     
   
     
@@ -319,17 +297,11 @@
     
   
     
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -337,8 +309,6 @@
     
   
     
-      <option value="/docs/file-tail.html">File Tail</option>
-    
   
     
   
@@ -348,24 +318,35 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Installing">
+      
+
+
+  
+
   
     
   
     
   
     
+      <option value="/docs/building.html">Building ORC</option>
+    
   
     
   
-
+    
   
-
+    
   
     
   
     
-      <option value="/docs/compression.html">Compression</option>
+  
     
   
     
@@ -411,8 +392,6 @@
     
   
     
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
   
     
   
@@ -422,12 +401,8 @@
   
     
   
-
-  
-
-  
     
-  
+      <option value="/docs/releases.html">Releases</option>
     
   
     
@@ -442,28 +417,37 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Using in Hive">
+      
+
+
   
-    
+
   
     
   
     
-      <option value="/docs/stripes.html">Stripes</option>
+  
     
   
     
   
-
+    
   
-
+    
   
     
   
     
   
     
-      <option value="/docs/encodings.html">Column Encodings</option>
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
     
   
     
@@ -505,12 +489,10 @@
     
   
     
-  
+      <option value="/docs/hive-config.html">Hive Configuration</option>
     
   
     
-      <option value="/docs/spec-index.html">Indexes</option>
-    
   
     
   
@@ -518,106 +500,35 @@
   
     
   
-
-
-    </optgroup>
     
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>Hive DDL</h1>
-          <p>ORC is well integrated into Hive, so storing your istari table as ORC
-is done by adding “STORED AS ORC”.</p>
-
-<p><code>CREATE TABLE istari (
-  name STRING,
-  color STRING
-) STORED AS ORC;
-</code></p>
-
-<p>To modify a table so that new partitions of the istari table are
-stored as ORC files:</p>
-
-<p><code>ALTER TABLE istari SET FILEFORMAT ORC;
-</code></p>
-
-<p>As of Hive 0.14, users can request an efficient merge of small ORC files
-together by issuing a CONCATENATE command on their table or partition. The
-files will be merged at the stripe level without reserializatoin.</p>
-
-<p><code>ALTER TABLE istari [PARTITION partition_spec] CONCATENATE;
-</code></p>
-
-<p>To get information about an ORC file, use the orcfiledump command.</p>
-
-<p><code>% hive --orcfiledump &lt;path_to_file&gt;
-</code></p>
-
-<p>As of Hive 1.1, to display the data in the ORC file, use:</p>
-
-<p><code>% hive --orcfiledump -d &lt;path_to_file&gt;
-</code></p>
-
-          
-
-
-
-
-
   
+    
   
-
-  
-  
-
-  
+    
   
-
+    
   
+    
   
-
+    
   
+    
   
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            
-            
-            <a href="/docs/acid.html" class="prev">Back</a>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/hive-config.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
     
+  
 
-        </article>
-      </div>
 
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
+    </optgroup>
     
+    <optgroup label="Using in MapReduce">
+      
 
-<ul>
 
   
 
   
     
   
-
-  
     
   
     
@@ -631,23 +542,15 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
-      <li class=""><a href="/docs/index.html">Background</a></li>
-      
-
-
-  
-
   
     
   
-
-  
     
   
     
   
     
-  
+      <option value="/docs/mapred.html">Using in MapRed</option>
     
   
     
@@ -666,18 +569,12 @@ files will be merged at the stripe level without reserializatoin.</p>
   
     
   
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
-
 
   
 
   
     
   
-
-  
     
   
     
@@ -693,36 +590,20 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
-      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
-      
-
-
-  
-
   
     
   
-
+    
   
     
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
-
-
-</ul>
-
+  
     
-    <h4>Hive Usage</h4>
+      <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
-
-<ul>
-
-  
-
   
     
   
-
+    
   
     
   
@@ -734,8 +615,11 @@ files will be merged at the stripe level without reserializatoin.</p>
   
     
   
+
+
+    </optgroup>
     
-      <li class="current"><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+    <optgroup label="Using ORC Core">
       
 
 
@@ -744,10 +628,6 @@ files will be merged at the stripe level without reserializatoin.</p>
   
     
   
-
-  
-    
-  
     
   
     
@@ -755,24 +635,16 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
-      
-
-
-</ul>
-
+      <option value="/docs/core-java.html">Using Core Java</option>
     
-    <h4>Format Specification</h4>
+  
     
-
-<ul>
-
   
-
+    
   
     
   
-
+    
   
     
   
@@ -795,8 +667,965 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
-      
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Tools">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/tools.html">Tools</option>
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Hive DDL</h1>
+          <p>ORC is well integrated into Hive, so storing your istari table as ORC
+is done by adding “STORED AS ORC”.</p>
+
+<p><code>CREATE TABLE istari (
+  name STRING,
+  color STRING
+) STORED AS ORC;
+</code></p>
+
+<p>To modify a table so that new partitions of the istari table are
+stored as ORC files:</p>
+
+<p><code>ALTER TABLE istari SET FILEFORMAT ORC;
+</code></p>
+
+<p>As of Hive 0.14, users can request an efficient merge of small ORC files
+together by issuing a CONCATENATE command on their table or partition. The
+files will be merged at the stripe level without reserializatoin.</p>
+
+<p><code>ALTER TABLE istari [PARTITION partition_spec] CONCATENATE;
+</code></p>
+
+<p>To get information about an ORC file, use the orcfiledump command.</p>
+
+<p><code>% hive --orcfiledump &lt;path_to_file&gt;
+</code></p>
+
+<p>As of Hive 1.1, to display the data in the ORC file, use:</p>
+
+<p><code>% hive --orcfiledump -d &lt;path_to_file&gt;
+</code></p>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/releases.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/hive-config.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Installing</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in Hive</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in MapReduce</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using ORC Core</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Tools</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/tools.html">Tools</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
 
 
   
@@ -813,6 +1642,12 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
       
 
@@ -827,6 +1662,10 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
       <li class=""><a href="/docs/compression.html">Compression</a></li>
       
 
@@ -855,6 +1694,18 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
       
 
@@ -889,6 +1740,18 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/stripes.html">Stripes</a></li>
       
 
@@ -905,6 +1768,12 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
       
 
@@ -935,6 +1804,18 @@ files will be merged at the stripe level without reserializatoin.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
       
 

http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/index.html
----------------------------------------------------------------------
diff --git a/docs/index.html b/docs/index.html
index f2b43fc..906c346 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -105,25 +105,19 @@
     
   
     
-      <option value="/docs/index.html">Background</option>
-    
   
     
   
     
   
     
-  
+      <option value="/docs/index.html">Background</option>
     
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -140,21 +134,23 @@
   
     
   
-    
+
   
-    
+
   
     
   
     
+      <option value="/docs/adopters.html">ORC Adopters</option>
+    
   
     
-      <option value="/docs/types.html">Types</option>
+  
     
   
-
+    
   
-
+    
   
     
   
@@ -171,7 +167,7 @@
     
   
     
-      <option value="/docs/indexes.html">Indexes</option>
+  
     
   
     
@@ -189,8 +185,6 @@
 
   
     
-      <option value="/docs/acid.html">ACID support</option>
-    
   
     
   
@@ -216,16 +210,7 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Hive Usage">
-      
-
-
-  
-
   
     
   
@@ -238,12 +223,12 @@
     
   
     
-      <option value="/docs/hive-ddl.html">Hive DDL</option>
+      <option value="/docs/types.html">Types</option>
     
   
-    
+
   
-    
+
   
     
   
@@ -255,9 +240,7 @@
   
     
   
-
-  
-
+    
   
     
   
@@ -268,7 +251,7 @@
     
   
     
-      <option value="/docs/hive-config.html">Hive Configuration</option>
+      <option value="/docs/indexes.html">Indexes</option>
     
   
     
@@ -287,19 +270,14 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Format Specification">
-      
-
+  
 
   
 
   
     
-  
+      <option value="/docs/acid.html">ACID support</option>
     
   
     
@@ -319,17 +297,11 @@
     
   
     
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -337,8 +309,6 @@
     
   
     
-      <option value="/docs/file-tail.html">File Tail</option>
-    
   
     
   
@@ -348,24 +318,35 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Installing">
+      
+
+
+  
+
   
     
   
     
   
     
+      <option value="/docs/building.html">Building ORC</option>
+    
   
     
   
-
+    
   
-
+    
   
     
   
     
-      <option value="/docs/compression.html">Compression</option>
+  
     
   
     
@@ -411,8 +392,6 @@
     
   
     
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
   
     
   
@@ -422,12 +401,8 @@
   
     
   
-
-  
-
-  
     
-  
+      <option value="/docs/releases.html">Releases</option>
     
   
     
@@ -442,28 +417,37 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Using in Hive">
+      
+
+
   
-    
+
   
     
   
     
-      <option value="/docs/stripes.html">Stripes</option>
+  
     
   
     
   
-
+    
   
-
+    
   
     
   
     
   
     
-      <option value="/docs/encodings.html">Column Encodings</option>
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
     
   
     
@@ -505,11 +489,13 @@
     
   
     
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
   
     
   
     
-      <option value="/docs/spec-index.html">Indexes</option>
+  
     
   
     
@@ -518,92 +504,31 @@
   
     
   
-
-
-    </optgroup>
     
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>Background</h1>
-          <p>Back in January 2013, we created ORC files as part of the initiative
-to massively speed up Apache Hive and improve the storage efficiency
-of data stored in Apache Hadoop. The focus was on enabling high speed
-processing and reducing file sizes.</p>
-
-<p>ORC is a self-describing type-aware columnar file format designed for 
-Hadoop workloads. It is optimized for large streaming reads, but with
-integrated support for finding required rows quickly. Storing data in
-a columnar format lets the reader read, decompress, and process only
-the values that are required for the current query. Because ORC files
-are type-aware, the writer chooses the most appropriate encoding for
-the type and builds an internal index as the file is written.</p>
-
-<p>Predicate pushdown uses those indexes to determine which stripes in a
-file need to be read for a particular query and the row indexes can
-narrow the search to a particular set of 10,000 rows. ORC supports the
-complete set of types in Hive, including the complex types: structs,
-lists, maps, and unions.</p>
-
-<p>Many large Hadoop users have adopted ORC. For instance, Facebook uses
-ORC to <a href="http://s.apache.org/fb-scaling-300-pb">save tens of petabytes</a>
-in their data warehouse and demonstrated that ORC is <a href="http://s.apache.org/presto-orc">significantly
-faster</a> than RC File or Parquet. Yahoo
-uses ORC to store their production data and has released some of their
-<a href="http://s.apache.org/yahoo-orc">benchmark results</a>.</p>
-
-<p>ORC files are divided in to <em>stripes</em> that are roughly 64MB by
-default. The stripes in a file are independent of each other and form
-the natural unit of distributed work. Within each stripe, the columns
-are separated from each other so the reader can read just the columns
-that are required.</p>
-
-          
-
-
-
-
-
   
+    
+  
+    
+  
+    
+  
+    
   
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            <span class="prev disabled">Back</span>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/types.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
     
+  
 
-        </article>
-      </div>
 
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
+    </optgroup>
     
+    <optgroup label="Using in MapReduce">
+      
 
-<ul>
 
   
 
   
     
   
-
-  
     
   
     
@@ -617,23 +542,15 @@ that are required.</p>
     
   
     
-      <li class="current"><a href="/docs/index.html">Background</a></li>
-      
-
-
-  
-
   
     
   
-
-  
     
   
     
   
     
-  
+      <option value="/docs/mapred.html">Using in MapRed</option>
     
   
     
@@ -652,18 +569,12 @@ that are required.</p>
   
     
   
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
-
 
   
 
   
     
   
-
-  
     
   
     
@@ -679,36 +590,20 @@ that are required.</p>
     
   
     
-      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
-      
-
-
-  
-
   
     
   
-
+    
   
     
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
-
-
-</ul>
-
+  
     
-    <h4>Hive Usage</h4>
+      <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
-
-<ul>
-
-  
-
   
     
   
-
+    
   
     
   
@@ -720,8 +615,11 @@ that are required.</p>
   
     
   
+
+
+    </optgroup>
     
-      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+    <optgroup label="Using ORC Core">
       
 
 
@@ -730,8 +628,6 @@ that are required.</p>
   
     
   
-
-  
     
   
     
@@ -739,26 +635,18 @@ that are required.</p>
     
   
     
-  
+      <option value="/docs/core-java.html">Using Core Java</option>
     
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
-      
-
-
-</ul>
-
+  
     
-    <h4>Format Specification</h4>
+  
     
-
-<ul>
-
   
-
+    
   
     
   
-
+    
   
     
   
@@ -780,8 +668,11 @@ that are required.</p>
   
     
   
+
+
+    </optgroup>
     
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+    <optgroup label="Tools">
       
 
 
@@ -790,7 +681,7 @@ that are required.</p>
   
     
   
-
+    
   
     
   
@@ -799,16 +690,955 @@ that are required.</p>
     
   
     
-      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
-      
-
-
   
-
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/tools.html">Tools</option>
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Background</h1>
+          <p>Back in January 2013, we created ORC files as part of the initiative
+to massively speed up Apache Hive and improve the storage efficiency
+of data stored in Apache Hadoop. The focus was on enabling high speed
+processing and reducing file sizes.</p>
+
+<p>ORC is a self-describing type-aware columnar file format designed for 
+Hadoop workloads. It is optimized for large streaming reads, but with
+integrated support for finding required rows quickly. Storing data in
+a columnar format lets the reader read, decompress, and process only
+the values that are required for the current query. Because ORC files
+are type-aware, the writer chooses the most appropriate encoding for
+the type and builds an internal index as the file is written.</p>
+
+<p>Predicate pushdown uses those indexes to determine which stripes in a
+file need to be read for a particular query and the row indexes can
+narrow the search to a particular set of 10,000 rows. ORC supports the
+complete set of types in Hive, including the complex types: structs,
+lists, maps, and unions.</p>
+
+<p>Many large Hadoop users have adopted ORC. For instance, Facebook uses
+ORC to <a href="http://s.apache.org/fb-scaling-300-pb">save tens of petabytes</a>
+in their data warehouse and demonstrated that ORC is <a href="http://s.apache.org/presto-orc">significantly
+faster</a> than RC File or Parquet. Yahoo
+uses ORC to store their production data and has released some of their
+<a href="http://s.apache.org/yahoo-orc">benchmark results</a>.</p>
+
+<p>ORC files are divided in to <em>stripes</em> that are roughly 64MB by
+default. The stripes in a file are independent of each other and form
+the natural unit of distributed work. Within each stripe, the columns
+are separated from each other so the reader can read just the columns
+that are required.</p>
+
+          
+
+
+
+
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            <span class="prev disabled">Back</span>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/adopters.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Installing</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in Hive</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in MapReduce</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
   
     
   
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using ORC Core</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Tools</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/tools.html">Tools</a></li>
+      
+
 
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
   
     
   
@@ -841,6 +1671,18 @@ that are required.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
       
 
@@ -875,6 +1717,18 @@ that are required.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/stripes.html">Stripes</a></li>
       
 
@@ -891,6 +1745,12 @@ that are required.</p>
     
   
     
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
       
 
@@ -921,6 +1781,18 @@ that are required.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
       
 

http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/indexes.html
----------------------------------------------------------------------
diff --git a/docs/indexes.html b/docs/indexes.html
index cfbd69d..acd040b 100644
--- a/docs/indexes.html
+++ b/docs/indexes.html
@@ -105,25 +105,19 @@
     
   
     
-      <option value="/docs/index.html">Background</option>
-    
   
     
   
     
   
     
-  
+      <option value="/docs/index.html">Background</option>
     
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -140,21 +134,23 @@
   
     
   
-    
+
   
-    
+
   
     
   
     
+      <option value="/docs/adopters.html">ORC Adopters</option>
+    
   
     
-      <option value="/docs/types.html">Types</option>
+  
     
   
-
+    
   
-
+    
   
     
   
@@ -171,7 +167,7 @@
     
   
     
-      <option value="/docs/indexes.html">Indexes</option>
+  
     
   
     
@@ -189,8 +185,6 @@
 
   
     
-      <option value="/docs/acid.html">ACID support</option>
-    
   
     
   
@@ -216,16 +210,7 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Hive Usage">
-      
-
-
-  
-
   
     
   
@@ -238,12 +223,12 @@
     
   
     
-      <option value="/docs/hive-ddl.html">Hive DDL</option>
+      <option value="/docs/types.html">Types</option>
     
   
-    
+
   
-    
+
   
     
   
@@ -255,9 +240,7 @@
   
     
   
-
-  
-
+    
   
     
   
@@ -268,7 +251,7 @@
     
   
     
-      <option value="/docs/hive-config.html">Hive Configuration</option>
+      <option value="/docs/indexes.html">Indexes</option>
     
   
     
@@ -287,19 +270,14 @@
   
     
   
-
-
-    </optgroup>
     
-    <optgroup label="Format Specification">
-      
-
+  
 
   
 
   
     
-  
+      <option value="/docs/acid.html">ACID support</option>
     
   
     
@@ -319,17 +297,11 @@
     
   
     
-      <option value="/docs/spec-intro.html">Introduction</option>
-    
   
     
   
     
   
-
-  
-
-  
     
   
     
@@ -337,8 +309,6 @@
     
   
     
-      <option value="/docs/file-tail.html">File Tail</option>
-    
   
     
   
@@ -348,24 +318,35 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Installing">
+      
+
+
+  
+
   
     
   
     
   
     
+      <option value="/docs/building.html">Building ORC</option>
+    
   
     
   
-
+    
   
-
+    
   
     
   
     
-      <option value="/docs/compression.html">Compression</option>
+  
     
   
     
@@ -411,8 +392,6 @@
     
   
     
-      <option value="/docs/run-length.html">Run Length Encoding</option>
-    
   
     
   
@@ -422,12 +401,8 @@
   
     
   
-
-  
-
-  
     
-  
+      <option value="/docs/releases.html">Releases</option>
     
   
     
@@ -442,28 +417,37 @@
   
     
   
+
+
+    </optgroup>
     
+    <optgroup label="Using in Hive">
+      
+
+
   
-    
+
   
     
   
     
-      <option value="/docs/stripes.html">Stripes</option>
+  
     
   
     
   
-
+    
   
-
+    
   
     
   
     
   
     
-      <option value="/docs/encodings.html">Column Encodings</option>
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
     
   
     
@@ -505,11 +489,11 @@
     
   
     
-  
+      <option value="/docs/hive-config.html">Hive Configuration</option>
     
   
     
-      <option value="/docs/spec-index.html">Indexes</option>
+  
     
   
     
@@ -518,97 +502,33 @@
   
     
   
-
-
-    </optgroup>
     
-  </select>
-</div>
-
-
-      <div class="unit four-fifths">
-        <article>
-          <h1>Indexes</h1>
-          <p>ORC provides three level of indexes within each file:</p>
-
-<ul>
-  <li>file level - statistics about the values in each column across the entire 
-file</li>
-  <li>stripe level - statistics about the values in each column for each stripe</li>
-  <li>row level - statistics about the values in each column for each set of
-10,000 rows within a stripe</li>
-</ul>
-
-<p>The file and stripe level column statistics are in the file footer so
-that they are easy to access to determine if the rest of the file
-needs to be read at all. Row level indexes include both the column
-statistics for each row group and the position for seeking to the
-start of the row group.</p>
-
-<p>Column statistics always contain the count of values and whether there
-are null values present. Most other primitive types include the
-minimum and maximum values and for numeric types the sum. As of Hive
-1.2, the indexes can include bloom filters, which provide a much more
-selective filter.</p>
-
-<p>The indexes at all levels are used by the reader using Search
-ARGuments or SARGs, which are simplified expressions that restrict the
-rows that are of interest. For example, if a query was looking for
-people older than 100 years old, the SARG would be “age &gt; 100” and
-only files, stripes, or row groups that had people over 100 years old
-would be read.</p>
-
-          
-
-
-
-
-
   
+    
   
-
+    
   
+    
   
-
+    
   
+    
   
-    <div class="section-nav">
-      <div class="left align-right">
-          
-            
-            
-            <a href="/docs/types.html" class="prev">Back</a>
-          
-      </div>
-      <div class="right align-left">
-          
-            
-            
-            <a href="/docs/acid.html" class="next">Next</a>
-          
-      </div>
-    </div>
-    <div class="clear"></div>
     
+  
 
-        </article>
-      </div>
 
-      <div class="unit one-fifth hide-on-mobiles">
-  <aside>
-    
-    <h4>Overview</h4>
+    </optgroup>
     
+    <optgroup label="Using in MapReduce">
+      
 
-<ul>
 
   
 
   
     
   
-
-  
     
   
     
@@ -622,23 +542,15 @@ would be read.</p>
     
   
     
-      <li class=""><a href="/docs/index.html">Background</a></li>
-      
-
-
-  
-
   
     
   
-
-  
     
   
     
   
     
-  
+      <option value="/docs/mapred.html">Using in MapRed</option>
     
   
     
@@ -657,18 +569,12 @@ would be read.</p>
   
     
   
-    
-      <li class=""><a href="/docs/types.html">Types</a></li>
-      
-
 
   
 
   
     
   
-
-  
     
   
     
@@ -684,36 +590,20 @@ would be read.</p>
     
   
     
-      <li class="current"><a href="/docs/indexes.html">Indexes</a></li>
-      
-
-
-  
-
   
     
   
-
+    
   
     
-      <li class=""><a href="/docs/acid.html">ACID support</a></li>
-      
-
-
-</ul>
-
+  
     
-    <h4>Hive Usage</h4>
+      <option value="/docs/mapreduce.html">Using in MapReduce</option>
     
-
-<ul>
-
-  
-
   
     
   
-
+    
   
     
   
@@ -725,8 +615,11 @@ would be read.</p>
   
     
   
+
+
+    </optgroup>
     
-      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+    <optgroup label="Using ORC Core">
       
 
 
@@ -735,8 +628,6 @@ would be read.</p>
   
     
   
-
-  
     
   
     
@@ -744,26 +635,18 @@ would be read.</p>
     
   
     
-  
+      <option value="/docs/core-java.html">Using Core Java</option>
     
-      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
-      
-
-
-</ul>
-
+  
     
-    <h4>Format Specification</h4>
+  
     
-
-<ul>
-
   
-
+    
   
     
   
-
+    
   
     
   
@@ -785,8 +668,11 @@ would be read.</p>
   
     
   
+
+
+    </optgroup>
     
-      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+    <optgroup label="Tools">
       
 
 
@@ -795,7 +681,7 @@ would be read.</p>
   
     
   
-
+    
   
     
   
@@ -804,16 +690,963 @@ would be read.</p>
     
   
     
-      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
-      
-
-
   
-
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/tools.html">Tools</option>
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Indexes</h1>
+          <p>ORC provides three level of indexes within each file:</p>
+
+<ul>
+  <li>file level - statistics about the values in each column across the entire 
+file</li>
+  <li>stripe level - statistics about the values in each column for each stripe</li>
+  <li>row level - statistics about the values in each column for each set of
+10,000 rows within a stripe</li>
+</ul>
+
+<p>The file and stripe level column statistics are in the file footer so
+that they are easy to access to determine if the rest of the file
+needs to be read at all. Row level indexes include both the column
+statistics for each row group and the position for seeking to the
+start of the row group.</p>
+
+<p>Column statistics always contain the count of values and whether there
+are null values present. Most other primitive types include the
+minimum and maximum values and for numeric types the sum. As of Hive
+1.2, the indexes can include bloom filters, which provide a much more
+selective filter.</p>
+
+<p>The indexes at all levels are used by the reader using Search
+ARGuments or SARGs, which are simplified expressions that restrict the
+rows that are of interest. For example, if a query was looking for
+people older than 100 years old, the SARG would be “age &gt; 100” and
+only files, stripes, or row groups that had people over 100 years old
+would be read.</p>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/types.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/acid.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Installing</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in Hive</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in MapReduce</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
   
     
   
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapred.html">Using in MapRed</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using ORC Core</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Tools</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/tools.html">Tools</a></li>
+      
+
 
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
   
     
   
@@ -846,6 +1679,18 @@ would be read.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
       
 
@@ -880,6 +1725,18 @@ would be read.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/stripes.html">Stripes</a></li>
       
 
@@ -896,6 +1753,12 @@ would be read.</p>
     
   
     
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
       
 
@@ -926,6 +1789,18 @@ would be read.</p>
     
   
     
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
       <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
       
 

http://git-wip-us.apache.org/repos/asf/orc/blob/1eb37b72/docs/mapred.html
----------------------------------------------------------------------
diff --git a/docs/mapred.html b/docs/mapred.html
new file mode 100644
index 0000000..698c2f6
--- /dev/null
+++ b/docs/mapred.html
@@ -0,0 +1,2106 @@
+<!DOCTYPE HTML>
+<html lang="en-US">
+<head>
+  <meta charset="UTF-8">
+  <title>Using in MapRed</title>
+  <meta name="viewport" content="width=device-width,initial-scale=1">
+  <meta name="generator" content="Jekyll v2.4.0">
+  <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+  <link rel="stylesheet" href="/css/screen.css">
+  <link rel="icon" type="image/x-icon" href="/favicon.ico">
+  <!--[if lt IE 9]>
+  <script src="/js/html5shiv.min.js"></script>
+  <script src="/js/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+
+<body class="wrap">
+  <header role="banner">
+  <nav class="mobile-nav show-on-mobiles">
+    <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Doc<span class="show-on-mobiles">s</span>
+                        <span class="hide-on-mobiles">umentation</span></a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+  </nav>
+  <div class="grid">
+    <div class="unit one-third center-on-mobiles">
+      <h1>
+        <a href="/">
+          <span class="sr-only">Apache ORC</span>
+          <img src="/img/logo.png" width="249" height="101" alt="ORC Logo">
+        </a>
+      </h1>
+    </div>
+    <nav class="main-nav unit two-thirds hide-on-mobiles">
+      <ul>
+  <li class="">
+    <a href="/">Home</a>
+  </li>
+  <li class="current">
+    <a href="/docs/">Doc<span class="show-on-mobiles">s</span>
+                        <span class="hide-on-mobiles">umentation</span></a>
+  </li>
+  <li class="">
+    <a href="/talks/">Talks</a>
+  </li>
+  <li class="">
+    <a href="/news/">News</a>
+  </li>
+  <li class="">
+    <a href="/help/">Help</a>
+  </li>
+  <li class="">
+    <a href="/develop/">Develop</a>
+  </li>
+</ul>
+
+    </nav>
+  </div>
+</header>
+
+
+    <section class="docs">
+    <div class="grid">
+
+      <div class="docs-nav-mobile unit whole show-on-mobiles">
+  <select onchange="if (this.value) window.location.href=this.value">
+    <option value="">Navigate the docs…</option>
+    
+    <optgroup label="Overview">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/index.html">Background</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+      <option value="/docs/adopters.html">ORC Adopters</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/types.html">Types</option>
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/indexes.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+      <option value="/docs/acid.html">ACID support</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Installing">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+      <option value="/docs/building.html">Building ORC</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/releases.html">Releases</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Using in Hive">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-ddl.html">Hive DDL</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/hive-config.html">Hive Configuration</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Using in MapReduce">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/mapred.html">Using in MapRed</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/mapreduce.html">Using in MapReduce</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Using ORC Core">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/core-java.html">Using Core Java</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Tools">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/tools.html">Tools</option>
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+    <optgroup label="Format Specification">
+      
+
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-intro.html">Introduction</option>
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/file-tail.html">File Tail</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/compression.html">Compression</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/run-length.html">Run Length Encoding</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/stripes.html">Stripes</option>
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/encodings.html">Column Encodings</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <option value="/docs/spec-index.html">Indexes</option>
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+
+
+    </optgroup>
+    
+  </select>
+</div>
+
+
+      <div class="unit four-fifths">
+        <article>
+          <h1>Using in MapRed</h1>
+          <p>This page describes how to read and write ORC files from Hadoop’s
+older org.apache.hadoop.mapred MapReduce APIs. If you want to use the
+new org.apache.hadoop.mapreduce API, please look at the <a href="/docs/mapreduce.html">next
+page</a>.</p>
+
+<h2 id="reading-orc-files">Reading ORC files</h2>
+
+<p>Add ORC and your desired version of Hadoop to your <code>pom.xml</code>:</p>
+
+<pre><code class="language-xml">&lt;dependencies&gt;
+  &lt;dependency&gt;
+    &lt;groupId&gt;org.apache.orc&lt;/groupId&gt;
+    &lt;artifactId&gt;orc-mapreduce&lt;/artifactId&gt;
+    &lt;version&gt;1.1.0&lt;/version&gt;
+  &lt;/dependency&gt;
+  &lt;dependency&gt;
+    &lt;groupId&gt;org.apache.hadoop&lt;/groupId&gt;
+    &lt;artifactId&gt;hadoop-mapreduce-client-core&lt;/artifactId&gt;
+    &lt;version&gt;2.7.0&lt;/version&gt;
+  &lt;/dependency&gt;
+&lt;/dependencies&gt;
+</code></pre>
+
+<p>Set the minimal properties in your JobConf:</p>
+
+<ul>
+  <li><strong>mapreduce.job.inputformat.class</strong> = org.apache.orc.mapred.OrcInputFormat</li>
+  <li><strong>mapreduce.input.fileinputformat.inputdir</strong> = your input directory</li>
+</ul>
+
+<p>ORC files contain a series of values of the same type and that type
+schema is encoded in the file. Because the ORC files are
+self-describing, the reader always knows how to correctly interpret
+the data. All of the ORC files written by Hive and most of the others have
+a struct as the value type.</p>
+
+<p>Your Mapper class will receive org.apache.hadoop.io.NullWritable as
+the key and a value based on the table below expanded recursively.</p>
+
+<table>
+  <thead>
+    <tr>
+      <th>ORC Type</th>
+      <th>Writable Type</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>array</td>
+      <td>org.apache.orc.mapred.OrcList</td>
+    </tr>
+    <tr>
+      <td>binary</td>
+      <td>org.apache.hadoop.io.BytesWritable</td>
+    </tr>
+    <tr>
+      <td>bigint</td>
+      <td>org.apache.hadoop.io.LongWritable</td>
+    </tr>
+    <tr>
+      <td>boolean</td>
+      <td>org.apache.hadoop.io.BooleanWritable</td>
+    </tr>
+    <tr>
+      <td>char</td>
+      <td>org.apache.hadoop.io.Text</td>
+    </tr>
+    <tr>
+      <td>date</td>
+      <td>org.apache.hadoop.hive.serde2.io.DateWritable</td>
+    </tr>
+    <tr>
+      <td>decimal</td>
+      <td>org.apache.hadoop.hive.serde2.io.HiveDecimalWritable</td>
+    </tr>
+    <tr>
+      <td>double</td>
+      <td>org.apache.hadoop.io.DoubleWritable</td>
+    </tr>
+    <tr>
+      <td>float</td>
+      <td>org.apache.hadoop.io.FloatWritable</td>
+    </tr>
+    <tr>
+      <td>int</td>
+      <td>org.apache.hadoop.io.IntWritable</td>
+    </tr>
+    <tr>
+      <td>map</td>
+      <td>org.apache.orc.mapred.OrcMap</td>
+    </tr>
+    <tr>
+      <td>smallint</td>
+      <td>org.apache.hadoop.io.ShortWritable</td>
+    </tr>
+    <tr>
+      <td>string</td>
+      <td>org.apache.hadoop.io.Text</td>
+    </tr>
+    <tr>
+      <td>struct</td>
+      <td>org.apache.orc.mapred.OrcStruct</td>
+    </tr>
+    <tr>
+      <td>timestamp</td>
+      <td>org.apache.orc.mapred.OrcTimestamp</td>
+    </tr>
+    <tr>
+      <td>tinyint</td>
+      <td>org.apache.hadoop.io.ByteWritable</td>
+    </tr>
+    <tr>
+      <td>uniontype</td>
+      <td>org.apache.orc.mapred.OrcUnion</td>
+    </tr>
+    <tr>
+      <td>varchar</td>
+      <td>org.apache.hadoop.io.Text</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Let’s assume that your input directory contains ORC files with the
+schema <code>struct&lt;s:string,i:int&gt;</code> and you want to use the string field
+as the key to the MapReduce shuffle and the integer as the value. The
+mapper code would look like:</p>
+
+<pre><code class="language-java">public class MyMapper
+    implements Mapper&lt;NullWritable,OrcStruct,Text,IntWritable&gt; {
+
+  // Input should be: struct&lt;s:string,i:int&gt;
+  public void map(NullWritable key, OrcStruct value,
+                  OutputCollector&lt;Text,IntWritable&gt; output,
+                  Reporter reporter) throws IOException {
+    output.collect((Text) value.getFieldValue(0),
+                   (IntWritable) value.getFieldValue(1));
+  }
+
+  public void configure(JobConf conf) { }
+
+  public void close() { }
+}
+</code></pre>
+
+<h2 id="writing-orc-files">Writing ORC files</h2>
+
+<p>To write ORC files from your MapReduce job, you’ll need to set</p>
+
+<ul>
+  <li><strong>mapreduce.job.outputformat.class</strong> = org.apache.orc.mapred.OrcOutputFormat</li>
+  <li><strong>mapreduce.output.fileoutputformat.outputdir</strong> = your output directory</li>
+  <li><strong>orc.mapred.output.schema</strong> = the schema to write to the ORC file</li>
+</ul>
+
+<p>The reducer needs to create the Writable value to be put into the ORC
+file and typically uses the OrcStruct.createValue(TypeDescription)
+function. For our example, let’s assume that the shuffle types are
+(Text, IntWritable) from the previous section and the reduce should
+gather the integer for each key together and write them as a list. The
+output schema would be <code>struct&lt;key:string,ints:array&lt;int&gt;&gt;</code>. As always
+with MapReduce, if your method stores the values, you need to copy their
+value before getting the next.</p>
+
+<pre><code class="language-java">public static class MyReducer
+  implements Reducer&lt;Text,IntWritable,NullWritable,OrcStruct&gt; {
+
+  private TypeDescription schema =
+    TypeDescription.fromString("struct&lt;key:string,ints:array&lt;int&gt;&gt;");
+  // createValue creates the correct value type for the schema
+  private OrcStruct pair = (OrcStruct) OrcStruct.createValue(schema);
+  // get a handle to the list of ints
+  private OrcList&lt;IntWritable&gt; values =
+    (OrcList&lt;IntWritable&gt;) pair.getFieldValue(1);
+  private final NullWritable nada = NullWritable.get();
+
+  public void reduce(Text key, Iterator&lt;IntWritable&gt; iterator,
+                     OutputCollector&lt;NullWritable, OrcStruct&gt; output,
+                     Reporter reporter) throws IOException {
+    pair.setFieldValue(0, key);
+    values.clear();
+    while (iterator.hasNext()) {
+      values.add(new IntWritable(iterator.next().get()));
+    }
+    output.collect(nada, pair);
+  }
+
+  public void configure(JobConf conf) { }
+
+  public void close() { }
+}
+</code></pre>
+
+<h2 id="sending-orcstruct-orclist-orcmap-or-orcunion-through-the-shuffle">Sending OrcStruct, OrcList, OrcMap, or OrcUnion through the Shuffle</h2>
+
+<p>In the previous examples, only the Hadoop types were sent through the
+MapReduce shuffle. The complex ORC types, since they are generic
+types, need to have their full type information provided to create the
+object. To enable MapReduce to properly instantiate the OrcStruct and
+other ORC types, we need to wrap it in either an OrcKey for the
+shuffle key or OrcValue for the shuffle value.</p>
+
+<p>To send two OrcStructs through the shuffle, define the following properties
+in the JobConf:</p>
+
+<ul>
+  <li><strong>mapreduce.map.output.key.class</strong> = org.apache.orc.mapred.OrcKey</li>
+  <li><strong>orc.mapred.map.output.key.schema</strong> = the shuffle key’s schema</li>
+  <li><strong>mapreduce.map.output.value.class</strong> = org.apache.orc.mapred.OrcValue</li>
+  <li><strong>orc.mapred.map.output.value.schema</strong> = the shuffle value’s schema</li>
+</ul>
+
+<p>The mapper just adds an OrcKey and OrcWrapper around the key and value
+respectively. These objects should be created once and reused as the mapper
+runs.</p>
+
+<pre><code class="language-java">public static class MyMapperWithShuffle
+  implements Mapper&lt;NullWritable,OrcStruct,OrcKey,OrcValue&gt; {
+
+  // create wrapper objects
+  private OrcKey keyWrapper = new OrcKey();
+  private OrcValue valueWrapper = new OrcValue();
+
+  // create a new structure to pass as the value in the shuffle
+  private OrcStruct outStruct = (OrcStruct) OrcStruct.createValue
+    (TypeDescription.fromString("struct&lt;i:int,j:int&gt;"));
+
+  // get the two fields of the outStruct
+  private IntWritable i = (IntWritable) outStruct.getFieldValue("i");
+  private IntWritable j = (IntWritable) outStruct.getFieldValue("j");
+
+  // Assume the input has type: struct&lt;s:string,i:int&gt;
+  public void map(NullWritable key, OrcStruct value,
+                  OutputCollector&lt;OrcKey,OrcValue&gt; output,
+                  Reporter reporter) throws IOException {
+    keyWrapper.key = value;
+    valueWrapper.value = outStruct;
+    int val = ((IntWritable) value.getFieldValue("i")).get();
+    i.set(val * 2);
+    j.set(val * val);
+    output.collect(keyWrapper, valueWrapper);
+  }
+
+  public void configure(JobConf conf) { }
+
+  public void close() { }
+}
+</code></pre>
+
+<p>The reducer code accesses the underlying OrcStructs by using the
+OrcKey.key and OrcValue.value fields.</p>
+
+          
+
+
+
+
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+
+  
+  
+    <div class="section-nav">
+      <div class="left align-right">
+          
+            
+            
+            <a href="/docs/hive-config.html" class="prev">Back</a>
+          
+      </div>
+      <div class="right align-left">
+          
+            
+            
+            <a href="/docs/mapreduce.html" class="next">Next</a>
+          
+      </div>
+    </div>
+    <div class="clear"></div>
+    
+
+        </article>
+      </div>
+
+      <div class="unit one-fifth hide-on-mobiles">
+  <aside>
+    
+    <h4>Overview</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/index.html">Background</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+      <li class=""><a href="/docs/adopters.html">ORC Adopters</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/types.html">Types</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/indexes.html">Indexes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+      <li class=""><a href="/docs/acid.html">ACID support</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Installing</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/building.html">Building ORC</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/releases.html">Releases</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in Hive</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-ddl.html">Hive DDL</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/hive-config.html">Hive Configuration</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using in MapReduce</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class="current"><a href="/docs/mapred.html">Using in MapRed</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/mapreduce.html">Using in MapReduce</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Using ORC Core</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/core-java.html">Using Core Java</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Tools</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/tools.html">Tools</a></li>
+      
+
+
+</ul>
+
+    
+    <h4>Format Specification</h4>
+    
+
+<ul>
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-intro.html">Introduction</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/file-tail.html">File Tail</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/compression.html">Compression</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/run-length.html">Run Length Encoding</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/stripes.html">Stripes</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/encodings.html">Column Encodings</a></li>
+      
+
+
+  
+
+  
+    
+  
+
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+  
+    
+      <li class=""><a href="/docs/spec-index.html">Indexes</a></li>
+      
+
+
+</ul>
+
+    
+  </aside>
+</div>
+
+
+      <div class="clear"></div>
+
+    </div>
+  </section>
+
+
+  <footer role="contentinfo">
+  <p>The contents of this website are &copy;&nbsp;2016
+     <a href="https://www.apache.org/">Apache Software Foundation</a>
+     under the terms of the <a
+      href="https://www.apache.org/licenses/LICENSE-2.0.html">
+      Apache&nbsp;License&nbsp;v2</a>. Apache ORC and its logo are trademarks
+      of the Apache Software Foundation.</p>
+</footer>
+
+  <script>
+  var anchorForId = function (id) {
+    var anchor = document.createElement("a");
+    anchor.className = "header-link";
+    anchor.href      = "#" + id;
+    anchor.innerHTML = "<span class=\"sr-only\">Permalink</span><i class=\"fa fa-link\"></i>";
+    anchor.title = "Permalink";
+    return anchor;
+  };
+
+  var linkifyAnchors = function (level, containingElement) {
+    var headers = containingElement.getElementsByTagName("h" + level);
+    for (var h = 0; h < headers.length; h++) {
+      var header = headers[h];
+
+      if (typeof header.id !== "undefined" && header.id !== "") {
+        header.appendChild(anchorForId(header.id));
+      }
+    }
+  };
+
+  document.onreadystatechange = function () {
+    if (this.readyState === "complete") {
+      var contentBlock = document.getElementsByClassName("docs")[0] || document.getElementsByClassName("news")[0];
+      if (!contentBlock) {
+        return;
+      }
+      for (var level = 1; level <= 6; level++) {
+        linkifyAnchors(level, contentBlock);
+      }
+    }
+  };
+</script>
+
+
+</body>
+</html>


Mime
View raw message