incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1178252 [32/34] - in /incubator/hcatalog/site: author/src/documentation/content/xdocs/ publish/docs/ publish/docs/r0.2.0/ publish/docs/r0.2.0/api/ publish/docs/r0.2.0/api/org/ publish/docs/r0.2.0/api/org/apache/ publish/docs/r0.2.0/api/org...
Date Sun, 02 Oct 2011 21:05:30 GMT
Added: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-archt.jpg
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-archt.jpg?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-archt.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-box.jpg
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-box.jpg?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-box.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-product.jpg
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-product.jpg?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat-product.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat.jpg
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat.jpg?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/images/hcat.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/images/instruction_arrow.png
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/images/instruction_arrow.png?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/images/instruction_arrow.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/index.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/index.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/index.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/index.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,275 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Overview </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menupage">
+<div class="menupagetitle">Overview</div>
+</div>
+<div class="menuitem">
+<a href="install.html">Source Installation</a>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Overview </h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#HCatalog">HCatalog </a>
+</li>
+<li>
+<a href="#HCatalog+Architecture">HCatalog Architecture</a>
+<ul class="minitoc">
+<li>
+<a href="#Interfaces">Interfaces</a>
+</li>
+<li>
+<a href="#Data+Model">Data Model</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Data+Flow+Example">Data Flow Example</a>
+</li>
+</ul>
+</div>
+</div>
+   
+<a name="HCatalog"></a>
+<h2 class="h3">HCatalog </h2>
+<div class="section">
+<p>HCatalog is a table management and storage management layer for Hadoop that enables users with different data processing tools &ndash; Pig, MapReduce, Hive, Streaming &ndash; to more easily read and write data on the grid. HCatalog&rsquo;s table abstraction presents users with a relational view of data in the Hadoop distributed file system (HDFS) and ensures that users need not worry about where or in what format their data is stored &ndash; RCFile format, text files, sequence files. </p>
+<p>(Note: In this release, Streaming is not supported. Also, HCatalog supports only writing RCFile formatted files and only reading PigStorage formated text files.)</p>
+<p></p>
+<div style="text-align: center;">
+<img class="figure" alt="HCatalog Product" src="images/hcat-product.jpg"></div>
+</div>
+      
+      
+      
+<a name="HCatalog+Architecture"></a>
+<h2 class="h3">HCatalog Architecture</h2>
+<div class="section">
+<p>HCatalog is built on top of the Hive metastore and incorporates components from the Hive DDL. HCatalog provides read and write interfaces for Pig and MapReduce and a command line interface for data definitions.</p>
+<p>(Note: HCatalog notification is not available in this release.)</p>
+<div style="text-align: center;">
+<img class="figure" alt="HCatalog Architecture" src="images/hcat-archt.jpg"></div>
+<p></p>
+<a name="Interfaces"></a>
+<h3 class="h4">Interfaces</h3>
+<p>The HCatalog interface for Pig &ndash; HCatLoader and HCatStorer &ndash; is an implementation of the Pig load and store interfaces. HCatLoader accepts a table to read data from; you can indicate which partitions to scan by immediately following the load statement with a partition filter statement. HCatStorer accepts a table to write to and a specification of partition keys to create a new partition. Currently HCatStorer only supports writing to one partition. HCatLoader and HCatStorer are implemented on top of HCatInputFormat and HCatOutputFormat respectively (see <a href="loadstore.html">HCatalog Load and Store</a>).</p>
+<p>The HCatalog interface for MapReduce &ndash; HCatInputFormat and HCatOutputFormat &ndash; is an implementation of Hadoop InputFormat and OutputFormat. HCatInputFormat accepts a table to read data from and a selection predicate to indicate which partitions to scan. HCatOutputFormat accepts a table to write to and a specification of partition keys to create a new partition. Currently HCatOutputFormat only supports writing to one partition (see <a href="inputoutput.html">HCatalog Input and Output</a>).</p>
+<p>
+<strong>Note:</strong> Currently there is no Hive-specific interface. Since HCatalog uses Hive's metastore, Hive can read data in HCatalog directly as long as a SerDe for that data already exists. In the future we plan to write a HCatalogSerDe so that users won't need storage-specific SerDes and so that Hive users can write data to HCatalog. Currently, this is supported - if a Hive user writes data in the RCFile format, it is possible to read the data through HCatalog. Also, see <a href="supportedformats.html">Supported data formats</a>.</p>
+<p>Data is defined using HCatalog's command line interface (CLI). The HCatalog CLI supports most of the DDL portion of Hive's query language, allowing users to create, alter, drop tables, etc. The CLI also supports the data exploration part of the Hive command line, such as SHOW TABLES, DESCRIBE TABLE, etc. (see the <a href="cli.html">HCatalog Command Line Interface</a>).</p>
+<a name="Data+Model"></a>
+<h3 class="h4">Data Model</h3>
+<p>HCatalog presents a relational view of data in HDFS. Data is stored in tables and these tables can be placed in databases. Tables can also be hash partitioned on one or more keys; that is, for a given value of a key (or set of keys) there will be one partition that contains all rows with that value (or set of values). For example, if a table is partitioned on date and there are three days of data in the table, there will be three partitions in the table. New partitions can be added to a table, and partitions can be dropped from a table. Partitioned tables have no partitions at create time. Unpartitioned tables effectively have one default partition that must be created at table creation time. There is no guaranteed read consistency when a partition is dropped.</p>
+<p>Partitions contain records. Once a partition is created records cannot be added to it, removed from it, or updated in it. (In the future some ability to integrate changes to a partition will be added.) Partitions are multi-dimensional and not hierarchical. Records are divided into columns. Columns have a name and a datatype. HCatalog supports the same datatypes as Hive (see <a href="loadstore.html">HCatalog Load and Store</a>). </p>
+</div>
+     
+  
+<a name="Data+Flow+Example"></a>
+<h2 class="h3">Data Flow Example</h2>
+<div class="section">
+<p>This simple data flow example shows how HCatalog is used to move data from the grid into a database. 
+  From the database, the data can then be analyzed using Hive.</p>
+<p>
+<strong>First</strong> Joe in data acquisition uses distcp to get data onto the grid.</p>
+<pre class="code">
+hadoop distcp file:///file.dat hdfs://data/rawevents/20100819/data
+
+hcat "alter table rawevents add partition 20100819 hdfs://data/rawevents/20100819/data"
+</pre>
+<p>
+<strong>Second</strong> Sally in data processing uses Pig to cleanse and prepare the data.</p>
+<p>Without HCatalog, Sally must be manually informed by Joe that data is available, or use Oozie and poll on HDFS.</p>
+<pre class="code">
+A = load '/data/rawevents/20100819/data' as (alpha:int, beta:chararray, &hellip;);
+B = filter A by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'data/processedevents/20100819/data';
+</pre>
+<p>With HCatalog, Oozie will be notified by HCatalog data is available and can then start the Pig job</p>
+<pre class="code">
+A = load 'rawevents' using HCatLoader;
+B = filter A by date = '20100819' and by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'processedevents' using HCatStorer("date=20100819");
+</pre>
+<p>
+<strong>Third</strong> Robert in client management uses Hive to analyze his clients' results.</p>
+<p>Without HCatalog, Robert must alter the table to add the required partition. </p>
+<pre class="code">
+alter table processedevents add partition 20100819 hdfs://data/processedevents/20100819/data
+
+select advertiser_id, count(clicks)
+from processedevents
+where date = '20100819' 
+group by adverstiser_id;
+</pre>
+<p>With HCatalog, Robert does not need to modify the table structure.</p>
+<pre class="code">
+select advertiser_id, count(clicks)
+from processedevents
+where date = &lsquo;20100819&rsquo; 
+group by adverstiser_id;
+</pre>
+</div>
+  
+  
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<div id="logos">
+<a href="http://forrest.apache.org/"><img border="0" title="Built with Apache Forrest" alt="Built with Apache Forrest - logo" src="images/built-with-forrest-button.png" style="width: 88px;height: 31px;"></a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/index.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/index.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/index.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,321 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Input and Output Interfaces</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Source Installation</a>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Input &amp; Output Interfaces </div>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="inputoutput.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Input and Output Interfaces</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Set+Up">Set Up</a>
+</li>
+<li>
+<a href="#HCatInputFormat">HCatInputFormat</a>
+<ul class="minitoc">
+<li>
+<a href="#API">API</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#HCatOutputFormat">HCatOutputFormat</a>
+<ul class="minitoc">
+<li>
+<a href="#API-N1006C">API</a>
+</li>
+<li>
+<a href="#Partition+Schema+Semantics">Partition Schema Semantics</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+
+ <!-- ==================================================================== --> 
+  
+<a name="Set+Up"></a>
+<h2 class="h3">Set Up</h2>
+<div class="section">
+<p>No HCatalog-specific setup is required for the HCatInputFormat and HCatOutputFormat interfaces.</p>
+<p></p>
+<p>
+<strong>Authentication</strong>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+	
+<tr>
+	
+<td colspan="1" rowspan="1">
+<p>If a failure results in a message like "2010-11-03 16:17:28,225 WARN hive.metastore ... - Unable to connect metastore with URI thrift://..." in /tmp/&lt;username&gt;/hive.log, then make sure you have run "kinit &lt;username&gt;@FOO.COM" to get a kerberos ticket and to be able to authenticate to the HCatalog server. </p>
+</td>
+	
+</tr>
+
+</table>
+</div>
+
+<!-- ==================================================================== -->
+
+<a name="HCatInputFormat"></a>
+<h2 class="h3">HCatInputFormat</h2>
+<div class="section">
+<p>The HCatInputFormat is used with MapReduce jobs to read data from HCatalog managed tables.</p>
+<p>HCatInputFormat exposes a new Hadoop 20 MapReduce API for reading data as if it had been published to a table. If a MapReduce job uses this InputFormat to write output, the default InputFormat configured for the table is used as the underlying InputFormat and the new partition is published to the table after the job completes. Also, the maximum number of partitions that a job can work on is limited to 100K.</p>
+<a name="API"></a>
+<h3 class="h4">API</h3>
+<p>The API exposed by HCatInputFormat is shown below.</p>
+<p>To use HCatInputFormat to read data, first instantiate a <span class="codefrag">HCatTableInfo</span> with the necessary information from the table being read 
+	and then call setInput on the <span class="codefrag">HCatInputFormat</span>.</p>
+<p>You can use the <span class="codefrag">setOutputSchema</span> method to include a projection schema, to specify specific output fields. If a schema is not specified, this default to the table level schema.</p>
+<p>You can use the <span class="codefrag">getTableSchema</span> methods to determine the table schema for a specified input table.</p>
+<pre class="code">
+    /**
+     * Set the input to use for the Job. This queries the metadata server with
+     * the specified partition predicates, gets the matching partitions, puts
+     * the information in the conf object. The inputInfo object is updated with
+     * information needed in the client context
+     * @param job the job object
+     * @param inputInfo the table input info
+     * @throws IOException the exception in communicating with the metadata server
+     */
+    public static void setInput(Job job, HCatTableInfo inputInfo) throws IOException;
+
+    /**
+     * Set the schema for the HCatRecord data returned by HCatInputFormat.
+     * @param job the job object
+     * @param hcatSchema the schema to use as the consolidated schema
+     */
+    public static void setOutputSchema(Job job,HCatSchema hcatSchema) throws Exception;
+
+    /**
+     * Gets the HCatalog schema for the table specified in the HCatInputFormat.setInput call
+     * on the specified job context. This information is available only after HCatInputFormat.setInput
+     * has been called for a JobContext.
+     * @param context the context
+     * @return the table schema
+     * @throws Exception if HCatInputFormat.setInput has not been called for the current context
+     */
+    public static HCatSchema getTableSchema(JobContext context) throws Exception	
+</pre>
+</div>    
+ 
+ 
+<!-- ==================================================================== -->      
+
+<a name="HCatOutputFormat"></a>
+<h2 class="h3">HCatOutputFormat</h2>
+<div class="section">
+<p>HCatOutputFormat is used with MapReduce jobs to write data to HCatalog managed tables.</p>
+<p>HCatOutputFormat exposes a new Hadoop 20 MapReduce API for writing data to a table. If a MapReduce job uses this OutputFormat to write output, the default OutputFormat configured for the table is used as the underlying OutputFormat and the new partition is published to the table after the job completes. </p>
+<a name="API-N1006C"></a>
+<h3 class="h4">API</h3>
+<p>The API exposed by HCatOutputFormat is shown below.</p>
+<p>The first call on the HCatOutputFormat must be <span class="codefrag">setOutput</span>; any other call will throw an exception saying the output format is not initialized. The schema for the data being written out is specified by the <span class="codefrag">setSchema </span> method. If this is not called on the HCatOutputFormat, then by default it is assumed that the the partition has the same schema as the current table level schema. </p>
+<pre class="code">
+/**
+     * Set the info about the output to write for the Job. This queries the metadata server
+     * to find the StorageDriver to use for the table.  Throws error if partition is already published.
+     * @param job the job object
+     * @param outputInfo the table output info
+     * @throws IOException the exception in communicating with the metadata server
+     */
+    public static void setOutput(Job job, HCatTableInfo outputInfo) throws IOException;
+
+    /**
+     * Set the schema for the data being written out to the partition. The
+     * table schema is used by default for the partition if this is not called.
+     * @param job the job object
+     * @param schema the schema for the data
+     * @throws IOException the exception
+     */
+    public static void setSchema(Job job, HCatSchema schema) throws IOException;
+
+    /**
+     * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
+     * on the specified job context.
+     * @param context the context
+     * @return the table schema
+     * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context
+     */
+    public static HCatSchema getTableSchema(JobContext context) throws IOException
+</pre>
+<a name="Partition+Schema+Semantics"></a>
+<h3 class="h4">Partition Schema Semantics</h3>
+<p>The partition schema specified can be different from the current table level schema. The rules about what kinds of schema are allowed are:</p>
+<ul>
+	
+<li>If a column is present in both the table schema and the partition schema, the type for the column should match. 
+</li>
+	
+<li>If the partition schema has lesser columns that the table level schema, then only the columns at the end of the table schema are allowed to be absent. Columns in the middle cannot be absent. So if table schema is "c1,c2,c3", partition schema can be "c1" or "c1,c2" but not "c1,c3" or "c2,c3"</li>
+	
+<li>If the partition schema has extra columns, then the extra columns should appear after the table schema. So if table schema is "c1,c2", the partition schema can be "c1,c2,c3" but not "c1,c3,c4". The table schema is automatically updated to have the extra column. In the previous example, the table schema will become "c1,c2,c3" after the completion of the job. 
+</li>
+	
+<li>The partition keys are not allowed to be present in the schema being written out. 
+</li>
+	
+</ul>
+</div>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/inputoutput.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/install.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/install.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/install.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/install.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,448 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Installing HCatalog</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Source Installation</div>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="install.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Installing HCatalog</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Server+Installation">Server Installation</a>
+</li>
+<li>
+<a href="#Starting+the+Server">Starting the Server</a>
+</li>
+<li>
+<a href="#Logging">Logging</a>
+</li>
+<li>
+<a href="#Stopping+the+Server">Stopping the Server</a>
+</li>
+<li>
+<a href="#Client+Install">Client Install</a>
+</li>
+</ul>
+</div>
+</div>
+
+  
+<a name="Server+Installation"></a>
+<h2 class="h3">Server Installation</h2>
+<div class="section">
+<p>
+<strong>Prerequisites</strong>
+</p>
+<ul>
+        
+<li>Machine on which the server can be installed - this should have
+        access to the hadoop cluster in question, and be accessible from
+        the machines you launch jobs from</li>
+        
+<li>MySQL db</li>
+        
+<li>Hadoop cluster</li>
+        
+<li>Unix user that the server will run as, and an associated kerberos
+        service principal and keytabs.</li>
+    
+</ul>
+<p>Throughout these instructions when you see a word in <em>italics</em> it
+    indicates a place where you should replace the word with a locally 
+    appropriate value such as a hostname or password.</p>
+<p>
+<strong>Database Setup</strong>
+</p>
+<p>Select a machine to install the database on.  This need not be the same
+    machine as the Thrift server, which we will set up later.  For large
+    clusters we recommend that they not be the same machine.  For the 
+    purposes of these instructions we will refer to this machine as
+    <em>hcatdb.acme.com</em>
+</p>
+<p>Install MySQL server on <em>hcatdb.acme.com</em>.  You can obtain
+    packages for MySQL from <a href="http://www.mysql.com/downloads/">MySQL's
+    download site</a>.  We have developed and tested with versions 5.1.46
+    and 5.1.48.  We suggest you use these versions or later.
+    Once you have MySQL up and running, use the <span class="codefrag">mysql</span> command line
+    tool to add the <span class="codefrag">hive</span> user and <span class="codefrag">hivemetastoredb</span>
+    database.  You will need to pick a password for your <span class="codefrag">hive</span>
+    user, and replace <em>dbpassword</em> in the following commands with it.</p>
+<p>
+<span class="codefrag">mysql -u root</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; CREATE USER 'hive'@'</span><em>hcatdb.acme.com</em><span class="codefrag">' IDENTIFIED BY '</span><em>dbpassword</em><span class="codefrag">';</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; CREATE DATABASE hivemetastoredb DEFAULT CHARACTER SET latin1 DEFAULT COLLATE latin1_swedish_ci;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; GRANT ALL PRIVILEGES ON hivemetastoredb.* TO 'hive'@'</span><em>hcatdb.acme.com</em><span class="codefrag">' WITH GRANT OPTION;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; flush privileges;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; quit;</span>
+</p>
+<p>In a temporary directory, untar the HCatalog artifact</p>
+<p>
+<span class="codefrag">tar xzf hcatalog-</span><em>version</em><span class="codefrag">.tar.gz</span>
+</p>
+<p>Use the database installation script found in the package to create the
+    database</p>
+<p>
+<span class="codefrag">mysql -u hive -D hivemetastoredb -h</span><em>hcatdb.acme.com</em><span class="codefrag"> -p &lt; share/hcatalog/hive/external/metastore/scripts/upgrade/mysql/hive-schema-0.7.0.mysql.sql</span>
+</p>
+<p>
+<strong>Thrift Server Setup</strong>
+</p>
+<p>Select a machine to install your Thrift server on.  For smaller and test
+    installations this can be the same machine as the database.  For the
+    purposes of these instructions we will refer to this machine as
+    <em>hcatsvr.acme.com</em>.</p>
+<p>Install the MySQL Java connector libraries on <em>hcatsvr.acme.com</em>.
+    You can obtain these from
+    <a href="http://www.mysql.com/downloads/connector/j/5.1.html">MySQL's
+    download site</a>.</p>
+<p>Select a user to run the Thrift server as.  This user should not be a
+    human user, and must be able to act as a proxy for other users.  We suggest
+    the name "hcat" for the user.  Throughout the rest of this documentation 
+    we will refer to this user as "hcat".  If necessary, add the user to 
+    <em>hcatsvr.acme.com</em>.</p>
+<p>Select a <em>root</em> directory for your installation of HCatalog.  This 
+    directory must be owned by the hcat user.  We recommend
+    <span class="codefrag">/usr/local/hcat</span>.  If necessary, create the directory.</p>
+<p>Download the HCatalog release into a temporary directory, and untar
+    it.  Then change directories into the new distribution and run the HCatalog
+    server installation script.  You will need to know the directory you chose
+    as <em>root</em> and the
+    directory you installed the MySQL Java connector libraries into (referred
+    to in the command below as <em>dbroot</em>).  You will also need your
+    <em>hadoop_home</em>, the directory where you have Hadoop installed, and 
+    the port number you wish HCatalog to operate on which you will use to set
+    <em>portnum</em>.</p>
+<p>
+<span class="codefrag">tar zxf hcatalog-</span><em>version</em><span class="codefrag">.tar.gz
+    cd hcatalog-</span><em>version</em>
+</p>
+<p>
+<span class="codefrag">share/hcatalog/scripts/hcat_server_install.sh -r </span><em>root</em><span class="codefrag"> -d </span><em>dbroot</em><span class="codefrag"> -h </span><em>hadoop_home</em><span class="codefrag"> -p </span><em>portnum</em>
+</p>
+<p>Now you need to edit your <em>root</em><span class="codefrag">/etc/hcatalog/hive-site.xml</span> file.
+    Open this file in your favorite text editor.  The following table shows the
+    values you need to configure.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<th colspan="1" rowspan="1">Parameter</th>
+            <th colspan="1" rowspan="1">Value to Set it to</th>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionURL</td>
+            <td colspan="1" rowspan="1">In the JDBC connection string, change DBHOSTNAME to the name 
+            of the machine you put the MySQL server on.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionPassword</td>
+            <td colspan="1" rowspan="1"><em>dbpassword</em> value you used in setting up the MySQL server
+            above</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.warehouse.dir</td>
+            <td colspan="1" rowspan="1">The directory you want to use for the default database in your
+            installation</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.uris</td>
+            <td colspan="1" rowspan="1">You need to set the hostname to your Thrift
+            server.  Replace <em>SVRHOST</em> with the name of the
+            machine you are installing the Thrift server on.  You can also
+            change the port the Thrift server runs on by changing the default
+            value of 3306.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.sasl.enabled</td>
+            <td colspan="1" rowspan="1">Set to true by default.  Set to false if you do not wish to
+            secure the thrift interface.  This can be convenient for testing.
+            We do not recommend turning this off in production.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.kerberos.keytab.file</td>
+            <td colspan="1" rowspan="1">The path to the Kerberos keytab file containg the metastore
+            thrift server's service principal.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.kerberos.principal</td>
+            <td colspan="1" rowspan="1">The service principal for the metastore thrift server.  You can
+            reference your host as _HOST and it will be replaced with your
+            actual hostname</td>
+        
+</tr>
+    
+</table>
+<p>You can now procede to starting the server.</p>
+</div>
+
+  
+<a name="Starting+the+Server"></a>
+<h2 class="h3">Starting the Server</h2>
+<div class="section">
+<p>Start the HCatalog server by switching directories to
+    <em>root</em> and invoking the start script
+    <span class="codefrag">share/hcatalog/scripts/hcat_server_start.sh</span>
+</p>
+</div>
+
+  
+<a name="Logging"></a>
+<h2 class="h3">Logging</h2>
+<div class="section">
+<p>Server activity logs and gc logs are located in
+    <em>root</em><span class="codefrag">/var/log/hcat_server</span>.  Logging configuration is located at
+    <em>root</em><span class="codefrag">/conf/log4j.properties</span>.  Server logging uses
+    <span class="codefrag">DailyRollingFileAppender</span> by default. It will generate a new
+    file per day and does not expire old log files automatically.</p>
+</div>
+
+  
+<a name="Stopping+the+Server"></a>
+<h2 class="h3">Stopping the Server</h2>
+<div class="section">
+<p>To stop the HCatalog server, change directories to the <em>root</em>
+    directory and invoke the stop script
+    <span class="codefrag">share/hcatalog/scripts/hcat_server_stop.sh</span>
+</p>
+</div>
+
+  
+<a name="Client+Install"></a>
+<h2 class="h3">Client Install</h2>
+<div class="section">
+<p>Select a <em>root</em> directory for your installation of HCatalog client.
+    We recommend <span class="codefrag">/usr/local/hcat</span>.  If necessary, create the directory.</p>
+<p>Download the HCatalog release into a temporary directory, and untar
+    it.</p>
+<p>
+<span class="codefrag">tar zxf hcatalog-</span><em>version</em><span class="codefrag">.tar.gz</span>
+</p>
+<p>Now you need to edit your <em>root</em><span class="codefrag">/etc/hcatalog/hive-site.xml</span> file.
+    Open this file in your favorite text editor.  The following table shows the
+    values you need to configure.   These values should match the values set on
+    the HCatalog server.  Do <strong>NOT</strong> copy the configuration file
+    from your server installation as that contains the password to your
+    database, which you should not distribute to your clients.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<th colspan="1" rowspan="1">Parameter</th>
+            <th colspan="1" rowspan="1">Value to Set it to</th>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.warehouse.dir</td>
+            <td colspan="1" rowspan="1">The directory you want to use for the default database in your
+            installation</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.uris</td>
+            <td colspan="1" rowspan="1">You need to set the hostname wish your Thrift
+            server to use by replacing <em>SVRHOST</em> with the name of the
+            machine you are installing the Thrift server on.  You can also
+            change the port the Thrift server runs on by changing the default
+            value of 3306.</td>
+        
+</tr>
+    
+</table>
+<p>The HCatalog command line interface (CLI) can now be invoked as
+    <em>root</em><span class="codefrag">/bin/hcat</span>.</p>
+</div>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/install.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/install.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/install.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,248 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Site Linkmap Table of Contents</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">HCatalog</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Source Installation</a>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="linkmap.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Site Linkmap Table of Contents</h1>
+<div id="front-matter"></div>
+<p>
+          This is a map of the complete site and its structure.
+        </p>
+<ul>
+<li>
+<a>HCatalog</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>site</em>
+</li>
+<ul>
+
+  
+<ul>
+<li>
+<a>HCatalog</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>docs</em>
+</li>
+<ul> 
+    
+<ul>
+<li>
+<a href="index.html">Overview</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="install.html">Source Installation</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="rpminstall.html">RPM Installation</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="loadstore.html">Load &amp; Store Interfaces</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="cli.html">Command Line Interface </a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="supportedformats.html">Storage Formats</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="dynpartition.html">Dynamic Partitioning</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="notification.html">Notification</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>    
+
+    
+<ul>
+<li>
+<a href="api/index.html">API Docs</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
+</li>
+</ul>
+  
+</ul>
+</ul>  
+
+
+</ul>
+</ul>
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/linkmap.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,549 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Load and Store Interfaces</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Source Installation</a>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Load &amp; Store Interfaces</div>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="loadstore.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Load and Store Interfaces</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Set+Up">Set Up</a>
+</li>
+<li>
+<a href="#HCatLoader">HCatLoader</a>
+<ul class="minitoc">
+<li>
+<a href="#Usage">Usage</a>
+</li>
+<li>
+<a href="#HCatalog+Data+Types">HCatalog Data Types</a>
+</li>
+<li>
+<a href="#Examples">Examples</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#HCatStorer">HCatStorer</a>
+<ul class="minitoc">
+<li>
+<a href="#Usage-N10129">Usage</a>
+</li>
+<li>
+<a href="#HCatalog+Data+Types-N10152">HCatalog Data Types</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+ 
+ <!-- ==================================================================== --> 
+  
+<a name="Set+Up"></a>
+<h2 class="h3">Set Up</h2>
+<div class="section">
+<p>The HCatLoader and HCatStorer interfaces are used with Pig scripts to read and write data in HCatalog managed tables. If you run your Pig script using the "pig" command (the bin/pig Perl script) no set up is required. </p>
+<pre class="code">
+$ pig mypig.script
+</pre>
+<p> If you run your Pig script using the "java" command (java -cp pig.jar...), then the hcat jar needs to be included in the classpath of the java command line (using the -cp option). Additionally, the following properties are required in the command line: </p>
+<ul>
+		
+<li>-Dhcat.metastore.uri=thrift://&lt;hcatalog server hostname&gt;:9080 </li>
+		
+<li>-Dhcat.metastore.principal=&lt;hcatalog server kerberos principal&gt; </li>
+	
+</ul>
+<pre class="code">
+$ java -cp pig.jar hcatalog.jar
+     -Dhcat.metastore.uri=thrift://&lt;hcatalog server hostname&gt;:9080 
+     -Dhcat.metastore.principal=&lt;hcatalog server kerberos principal&gt; myscript.pig
+</pre>
+<p></p>
+<p>
+<strong>Authentication</strong>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+	
+<tr>
+	
+<td colspan="1" rowspan="1">
+<p>If a failure results in a message like "2010-11-03 16:17:28,225 WARN hive.metastore ... - Unable to connect metastore with URI thrift://..." in /tmp/&lt;username&gt;/hive.log, then make sure you have run "kinit &lt;username&gt;@FOO.COM" to get a kerberos ticket and to be able to authenticate to the HCatalog server. </p>
+</td>
+	
+</tr>
+
+</table>
+</div>
+  
+      
+<!-- ==================================================================== -->
+     
+<a name="HCatLoader"></a>
+<h2 class="h3">HCatLoader</h2>
+<div class="section">
+<p>HCatLoader is used with Pig scripts to read data from HCatalog managed tables.</p>
+<a name="Usage"></a>
+<h3 class="h4">Usage</h3>
+<p>HCatLoader is accessed via a Pig load statement.</p>
+<pre class="code">
+A = LOAD 'dbname.tablename' USING org.apache.hcatalog.pig.HCatLoader(); 
+</pre>
+<p>
+<strong>Assumptions</strong>
+</p>
+<p>You must specify the database name and table name using this format: 'dbname.tablename'. Both the database and table must be created prior to running your Pig script. The Hive metastore lets you create tables without specifying a database; if you created tables this way, then the database name is 'default' and the string becomes 'default.tablename'. </p>
+<p>If the table is partitioned, you can indicate which partitions to scan by immediately following the load statement with a partition filter statement 
+    (see <a href="#Examples">Examples</a>). </p>
+<a name="HCatalog+Data+Types"></a>
+<h3 class="h4">HCatalog Data Types</h3>
+<p>Restrictions apply to the types of columns HCatLoader can read.</p>
+<p>HCatLoader  can read <strong>only</strong> the data types listed in the table. 
+The table shows how Pig will interpret the HCatalog data type.</p>
+<p>(Note: HCatalog does not support type Boolean.)</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>
+<strong>HCatalog Data Type</strong>
+</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>
+<strong>Pig Data Type</strong>
+</p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>primitives (int, long, float, double, string) </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>int, long, float, double <br> string to chararray</p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>map (key type should be string, valuetype can be a primitive listed above)</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>map </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;primitive&gt; or List&lt;map&gt; where map is of the type noted above </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag, with the primitive or map type as the field in each tuple of the bag </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>struct&lt;primitive fields&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>tuple </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;struct&lt;primitive fields&gt;&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag, where each tuple in the bag maps to struct &lt;primitive fields&gt; </p>
+            
+</td>
+    
+</tr>
+ 
+</table>
+<a name="Examples"></a>
+<h3 class="h4">Examples</h3>
+<p>This load statement will load all partitions of the specified table.</p>
+<pre class="code">
+/* myscript.pig */
+A = LOAD 'dbname.tablename' USING org.apache.hcatalog.pig.HCatLoader(); 
+...
+...
+</pre>
+<p>If only some partitions of the specified table are needed, include a partition filter statement <strong>immediately</strong> following the load statement. 
+The filter statement can include conditions on partition as well as non-partition columns.</p>
+<pre class="code">
+/* myscript.pig */
+A = LOAD 'dbname.tablename' USING  org.apache.hcatalog.pig.HCatLoader();
+ 
+B = filter A by date == '20100819' and by age &lt; 30; -- datestamp is a partition column; age is not
+ 
+C = filter A by date == '20100819' and by country == 'US'; -- datestamp and country are partition columns
+...
+...
+</pre>
+<p>Certain combinations of conditions on partition and non-partition columns are not allowed in filter statements.
+For example, the following script results in this error message:  <br> 
+<br>
+
+<span class="codefrag">ERROR 1112: Unsupported query: You have an partition column (datestamp ) in a construction like: (pcond and ...) or ( pcond and ...) where pcond is a condition on a partition column.</span> 
+<br> 
+<br>
+A workaround is to restructure the filter condition by splitting it into multiple filter conditions, with the first condition immediately following the load statement.
+</p>
+<pre class="code">
+/* This script produces an ERROR */
+
+A = LOAD 'default.search_austria' USING org.apache.hcatalog.pig.HCatLoader();
+B = FILTER A BY
+    (   (datestamp &lt; '20091103' AND browser &lt; 50)
+     OR (action == 'click' and browser &gt; 100)
+    );
+...
+...
+</pre>
+</div> 
+	
+<!-- ==================================================================== -->	
+	
+<a name="HCatStorer"></a>
+<h2 class="h3">HCatStorer</h2>
+<div class="section">
+<p>HCatStorer is used with Pig scripts to write data to HCatalog managed tables.</p>
+<a name="Usage-N10129"></a>
+<h3 class="h4">Usage</h3>
+<p>HCatStorer is accessed via a Pig store statement.</p>
+<pre class="code">
+A = LOAD ...
+B = FOREACH A ...
+...
+...
+my_processed_data = ...
+
+STORE my_processed_data INTO 'dbname.tablename' 
+    USING org.apache.hcatalog.pig.HCatStorer('month=12,date=25,hour=0300','a:int,b:chararray,c:map[]');
+</pre>
+<p>
+<strong>Assumptions</strong>
+</p>
+<p>You must specify the database name and table name using this format: 'dbname.tablename'. Both the database and table must be created prior to running your Pig script. The Hive metastore lets you create tables without specifying a database; if you created tables this way, then the database name is 'default' and string becomes 'default.tablename'. </p>
+<p>For the USING clause, you can have two string arguments: </p>
+<ul>
+
+<li>The first string argument represents key/value pairs for partition. This is a mandatory argument. In the above example, month, date and hour are columns on which table is partitioned. 
+The values for partition keys should NOT be quoted, even if the partition key is defined to be of string type. 
+</li>
+
+<li>The second string argument is the Pig schema for the data that will be written. This argument is optional, and if no schema is specified, a schema will be computed by Pig. If a schema is provided, it must match with the schema computed by Pig. (See also: <a href="inputoutput.html#Partition+Schema+Semantics">Partition Schema Semantics</a>.)</li>
+
+</ul>
+<p></p>
+<p></p>
+<a name="HCatalog+Data+Types-N10152"></a>
+<h3 class="h4">HCatalog Data Types</h3>
+<p>Restrictions apply to the types of columns HCatStorer can write.</p>
+<p>HCatStorer can write <strong>only</strong> the data types listed in the table. 
+The table shows how Pig will interpret the HCatalog data type.</p>
+<p>(Note: HCatalog does not support type Boolean.)</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>
+<strong>HCatalog Data Type</strong>
+</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>
+<strong>Pig Data Type</strong>
+</p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>primitives (int, long, float, double, string) </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>int, long, float, double, string <br>
+<br>
+               
+<strong>Note:</strong> HCatStorer does NOT support writing table columns of type smallint or tinyint. 
+               To be able to write form Pig using the HCatalog storer, table columns must by of type int or bigint.
+               </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>map (key type should be string, valuetype can be a primitive listed above)</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>map </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;primitive&gt; or List&lt;map&gt; where map is of the type noted above </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag, with the primitive or map type as the field in each tuple of the bag </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>struct&lt;primitive fields&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>tuple </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;struct&lt;primitive fields&gt;&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag, where each tuple in the bag maps to struct &lt;primitive fields&gt; </p>
+            
+</td>
+    
+</tr>
+ 
+</table>
+</div>
+	
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/loadstore.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.2.0/locationmap.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/locationmap.xml?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/locationmap.xml (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/locationmap.xml Sun Oct  2 21:05:22 2011
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<locationmap xmlns="http://apache.org/forrest/locationmap/1.0">
+  <components>
+    <matchers default="lm">
+      <matcher name="lm" src="org.apache.forrest.locationmap.WildcardLocationMapHintMatcher"/>
+    </matchers>
+<!--
+      * Can contain any sitemap selector with the following syntax. * 
+    <selectors default="exists">
+      <selector name="exists" logger="sitemap.selector.exists"  
+          src="org.apache.forrest.sourceexists.SourceExistsSelector" />
+    </selectors>
+    -->
+  </components>
+<!--
+    * Can contain a mount statement as a sibling to components and locator *
+    <mount src="somelocation.xml"/>
+  -->
+  <locator>
+<!--
+      * Can contain a mount within a selector where a selector is valid. 
+    <select>
+      <mount src="somelocation.xml"/>
+    </select>
+    -->
+    <match pattern="project.rewrite.**">
+      <location src="http://cocoon.apache.org/{1}.html"/>
+    </match>
+    <match pattern="project.remote.**.xml">
+      <location src="http://svn.apache.org/repos/asf/forrest/trunk/main/fresh-site/src/documentation/content/xdocs/{1}.xml"/>
+    </match>
+<!-- 
+      * Can use a selector inside a match.  *
+    <match pattern="somepattern/**">
+      <select>
+        <location src="first-location-attempted"/>
+        <location src="second-location-attempted"/>
+        <location src="third-location-attepted"/>
+      </select>
+    </match>
+    -->
+<!--
+     To locate all your source documents in a slide repository you can do:
+
+    <match pattern="tabs.xml">
+      <location src="http://127.0.0.1:8080/slide/files/tabs.xml"/>
+    </match>
+    <match pattern="site.xml">
+      <location src="http://127.0.0.1:8080/slide/files/site.xml"/>
+    </match>
+    <match pattern="**.xml">
+      <location src="http://127.0.0.1:8080/slide/files/{1}.xml"/>
+    </match>
+    -->
+  </locator>
+</locationmap>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/notification.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/notification.html?rev=1178252&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.2.0/notification.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.2.0/notification.html Sun Oct  2 21:05:22 2011
@@ -0,0 +1,319 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Notification</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.2.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Source Installation</a>
+</div>
+<div class="menuitem">
+<a href="rpminstall.html">RPM Installation</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Notification</div>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="notification.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Notification</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Notification+for+a+New+Partition">Notification for a New Partition</a>
+</li>
+<li>
+<a href="#Notification+for+a+Set+of+Partitions">Notification for a Set of Partitions</a>
+</li>
+</ul>
+</div>
+</div>
+  
+ 
+<p> In HCatalog 2.0 we introduce notifications for certain events happening in the system. This way applications such as Oozie can wait for those events and schedule the work that depends on them. The current version of HCatalog supports two kinds of events: </p>
+
+<ul>
+
+<li>Notification when a new partition is added</li>
+
+<li>Notification when a set of partitions is added</li>
+
+</ul>
+
+
+<p>No additional work is required to send a notification when a new partition is added: the existing addPartition call will send the notification message. This means that your existing code, when running with 0.2, will automatically send the notifications. </p>
+
+
+<a name="Notification+for+a+New+Partition"></a>
+<h2 class="h3">Notification for a New Partition</h2>
+<div class="section">
+<p>To receive notification that a new partition has been added, you need to follow these three steps.</p>
+<p>1. To start receiving messages, create a connection to a message bus as shown here:</p>
+<pre class="code">
+ConnectionFactory connFac = new ActiveMQConnectionFactory(amqurl);
+Connection conn = connFac.createConnection();
+conn.start();
+ </pre>
+<p>2. Subscribe to a topic you are interested in. When subscribing on a message bus, you need to subscribe to a particular topic to receive the messages that are being delivered on that topic. </p>
+<ul>
+  
+<li>  
+  
+<p>The topic name corresponding to a particular table is stored in table properties and can be retrieved using following piece of code: </p>
+ 
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(hiveConf);
+String topicName = msc.getTable("mydb", "myTbl").getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME);
+ </pre>
+ 
+</li>
+  
+  
+<li>  
+  
+<p>Use the topic name to subscribe to a topic as follows: </p>
+ 
+<pre class="code">
+Session session = conn.createSession(true, Session.SESSION_TRANSACTED);
+Destination hcatTopic = session.createTopic(topicName);
+MessageConsumer consumer = session.createConsumer(hcatTopic);
+consumer.setMessageListener(this);
+ </pre>
+ 
+</li>
+  
+</ul>
+<p>3. To start receiving messages you need to implement the JMS interface <span class="codefrag">MessageListener</span>, which, in turn, will make you implement the method <span class="codefrag">onMessage(Message msg)</span>. This method will be called whenever a new message arrives on the message bus. The message contains a partition object representing the corresponding partition, which can be retrieved as shown here: </p>
+<pre class="code">
+@Override
+   public void onMessage(Message msg) {
+      // We are interested in only add_partition events on this table.
+      // So, check message type first.
+      if(msg.getStringProperty(HCatConstants.HCAT_EVENT).equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)){
+          Object obj = (((ObjectMessage)msg).getObject());
+      }
+   }
+ </pre>
+<p>You need to have a JMS jar in your classpath to make this work. Additionally, you need to have a JMS provider&rsquo;s jar in your classpath. HCatalog uses ActiveMQ as a JMS provider. In principle, any JMS provider can be used in client side; however, ActiveMQ is recommended. ActiveMQ can be obtained from: http://activemq.apache.org/activemq-550-release.html </p>
+</div>
+
+
+<a name="Notification+for+a+Set+of+Partitions"></a>
+<h2 class="h3">Notification for a Set of Partitions</h2>
+<div class="section">
+<p>The example code below illustrates how to send a notification when a set of partitions has been added.</p>
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(conf);
+
+// Create a map, specifying partition key names and values
+Map&lt;String,String&gt; partMap = new HashMap&lt;String, String&gt;();
+partMap.put("date","20110711");
+partMap.put("country","*");
+
+// Mark the partition as "done"
+msc.markPartitionForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+</pre>
+<p>To receive this notification, the consumer needs to do the following:</p>
+<ol>
+
+<li>Repeat steps one and two from above to establish the connection to the notification system and to subscribe to the topic.</li>
+
+<li>Receive the notification as shown in this example:
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(conf);
+
+// Create a map, specifying partition key names and values
+Map&lt;String,String&gt; partMap = new HashMap&lt;String, String&gt;();
+partMap.put("date","20110711");
+partMap.put("country","*");
+
+// Mark the partition as "done"
+msc.markPartitionForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+</pre>
+
+</li>
+
+</ol>
+<p>If the consumer has registered with the message bus and is currently live, it will get the callback from the message bus once the producer marks the partition as "done".  Alternatively, the consumer can ask explicitly for a particular partition from the metastore. The following code illustrates the usage from a consumer's perspective:</p>
+<pre class="code">
+// Enquire to metastore whether a particular partition has been marked or not.
+boolean marked = msc.isPartitionMarkedForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+
+// Or register to a message bus and get asynchronous callback.
+ConnectionFactory connFac = new ActiveMQConnectionFactory(amqurl);
+Connection conn = connFac.createConnection();
+conn.start();
+Session session = conn.createSession(true, Session.SESSION_TRANSACTED);
+Destination hcatTopic = session.createTopic(topic);
+MessageConsumer consumer = session.createConsumer(hcatTopic);
+consumer.setMessageListener(this);
+
+
+public void onMessage(Message msg) {
+
+                                
+  MapMessage mapMsg = (MapMessage)msg;
+  Enumeration&lt;String&gt; keys = mapMsg.getMapNames();
+  
+  // Enumerate over all keys. This will print key value pairs specifying the particular partition 
+  // which was marked done. In this case, it will print:
+  // date : 20110711
+  // country: *
+
+  while(keys.hasMoreElements()){
+    String key = keys.nextElement();
+    System.out.println(key + " : " + mapMsg.getString(key));
+  }
+  System.out.println("Message: "+msg);
+</pre>
+<p>Notification is enabled by default. To disable notification, you need to leave <span class="codefrag">hive.metastore.event.listeners</span> blank or remove it from <span class="codefrag">hive-site.xml.</span>
+</p>
+</div>
+    
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.2.0/notification.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.2.0/notification.pdf?rev=1178252&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.2.0/notification.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message