incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r1339782 [39/41] - in /incubator/hcatalog/site: author/src/documentation/content/xdocs/ publish/ publish/docs/r0.4.0/ publish/docs/r0.4.0/api/ publish/docs/r0.4.0/api/org/ publish/docs/r0.4.0/api/org/apache/ publish/docs/r0.4.0/api/org/apac...
Date Thu, 17 May 2012 18:36:06 GMT
Added: incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.html?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.html Thu May 17 18:35:47 2012
@@ -0,0 +1,511 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Input and Output Interfaces</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.4.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Installation From Tarball</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Input &amp; Output Interfaces </div>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="inputoutput.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Input and Output Interfaces</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Set+Up">Set Up</a>
+</li>
+<li>
+<a href="#HCatInputFormat">HCatInputFormat</a>
+<ul class="minitoc">
+<li>
+<a href="#API">API</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#HCatOutputFormat">HCatOutputFormat</a>
+<ul class="minitoc">
+<li>
+<a href="#API-N1005C">API</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Examples">Examples</a>
+</li>
+</ul>
+</div>
+</div>
+
+ <!-- ==================================================================== --> 
+  
+<a name="Set+Up"></a>
+<h2 class="h3">Set Up</h2>
+<div class="section">
+<p>No HCatalog-specific setup is required for the HCatInputFormat and HCatOutputFormat interfaces.</p>
+<p></p>
+</div>
+
+<!-- ==================================================================== -->
+
+<a name="HCatInputFormat"></a>
+<h2 class="h3">HCatInputFormat</h2>
+<div class="section">
+<p>The HCatInputFormat is used with MapReduce jobs to read data from HCatalog managed tables.</p>
+<p>HCatInputFormat exposes a Hadoop 0.20 MapReduce API for reading data as if it had been published to a table.</p>
+<a name="API"></a>
+<h3 class="h4">API</h3>
+<p>The API exposed by HCatInputFormat is shown below.</p>
+<p>To use HCatInputFormat to read data, first instantiate as <span class="codefrag">InputJobInfo</span> with the necessary information from the table being read 
+	and then call setInput with the <span class="codefrag">InputJobInfo</span>.</p>
+<p>You can use the <span class="codefrag">setOutputSchema</span> method to include a projection schema, to
+specify specific output fields. If a schema is not specified all the columns in the table
+will be returned.</p>
+<p>You can use the <span class="codefrag">getTableSchema</span> methods to determine the table schema for a specified input table.</p>
+<pre class="code">
+  /**
+   * Set the input to use for the Job. This queries the metadata server with
+   * the specified partition predicates, gets the matching partitions, puts
+   * the information in the conf object. The inputInfo object is updated with
+   * information needed in the client context
+   * @param job the job object
+   * @param inputJobInfo the input info for table to read
+   * @throws IOException the exception in communicating with the metadata server
+   */
+  public static void setInput(Job job,
+      InputJobInfo inputJobInfo) throws IOException;
+
+  /**
+   * Set the schema for the HCatRecord data returned by HCatInputFormat.
+   * @param job the job object
+   * @param hcatSchema the schema to use as the consolidated schema
+   */
+  public static void setOutputSchema(Job job,HCatSchema hcatSchema) 
+    throws IOException;
+
+  /**
+   * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call
+   * on the specified job context. This information is available only after HCatInputFormat.setInput
+   * has been called for a JobContext.
+   * @param context the context
+   * @return the table schema
+   * @throws IOException if HCatInputFormat.setInput has not been called 
+   *                     for the current context
+   */
+  public static HCatSchema getTableSchema(JobContext context) 
+    throws IOException;	
+
+</pre>
+</div>    
+ 
+ 
+<!-- ==================================================================== -->      
+
+<a name="HCatOutputFormat"></a>
+<h2 class="h3">HCatOutputFormat</h2>
+<div class="section">
+<p>HCatOutputFormat is used with MapReduce jobs to write data to HCatalog managed tables.</p>
+<p>HCatOutputFormat exposes a Hadoop 20 MapReduce API for writing data to a table.
+    When a MapReduce job uses HCatOutputFormat to write output, the default OutputFormat configured for the table is used and the new partition is published to the table after the job completes. </p>
+<a name="API-N1005C"></a>
+<h3 class="h4">API</h3>
+<p>The API exposed by HCatOutputFormat is shown below.</p>
+<p>The first call on the HCatOutputFormat must be <span class="codefrag">setOutput</span>; any other call will throw an exception saying the output format is not initialized. The schema for the data being written out is specified by the <span class="codefrag">setSchema </span> method. You must call this method, providing the schema of data you are writing. If your data has same schema as table schema, you can use HCatOutputFormat.getTableSchema() to get the table schema and then pass that along to setSchema(). </p>
+<pre class="code">
+    /**
+     * Set the info about the output to write for the Job. This queries the metadata server
+     * to find the StorageDriver to use for the table.  Throws error if partition is already published.
+     * @param job the job object
+     * @param outputJobInfo the table output info
+     * @throws IOException the exception in communicating with the metadata server
+     */
+    @SuppressWarnings("unchecked")
+    public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException;
+
+    /**
+     * Set the schema for the data being written out to the partition. The
+     * table schema is used by default for the partition if this is not called.
+     * @param job the job object
+     * @param schema the schema for the data
+     */
+    public static void setSchema(final Job job, final HCatSchema schema) throws IOException;
+
+  /**
+   * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call
+   * on the specified job context.
+   * @param context the context
+   * @return the table schema
+   * @throws IOException if HCatOutputFromat.setOutput has not been called for the passed context
+   */
+  public static HCatSchema getTableSchema(JobContext context) throws IOException;
+
+</pre>
+</div>
+
+
+<a name="Examples"></a>
+<h2 class="h3">Examples</h2>
+<div class="section">
+<p>
+<strong>Running MapReduce with HCatalog</strong>
+</p>
+<p>
+Your MapReduce program will need to know where the thrift server to connect to is.  The
+easiest way to do this is pass it as an argument to your Java program. You will need to
+pass the Hive and HCatalog jars MapReduce as well, via the -libjars argument.</p>
+<pre class="code">
+export HADOOP_HOME=&lt;path_to_hadoop_install&gt;
+export HCAT_HOME=&lt;path_to_hcat_install&gt;
+export LIB_JARS=$HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar,
+$HIVE_HOME/lib/hive-metastore-0.9.0.jar,
+$HIVE_HOME/lib/libthrift-0.7.0.jar,
+$HIVE_HOME/lib/hive-exec-0.9.0.jar,
+$HIVE_HOME/lib/libfb303-0.7.0.jar,
+$HIVE_HOME/lib/jdo2-api-2.3-ec.jar,
+$HIVE_HOME/lib/slf4j-api-1.6.1.jar
+
+export HADOOP_CLASSPATH=$HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar:
+$HIVE_HOME/lib/hive-metastore-0.9.0.jar:
+$HIVE_HOME/lib/libthrift-0.7.0.jar:
+$HIVE_HOME/lib/hive-exec-0.9.0.jar:
+$HIVE_HOME/lib/libfb303-0.7.0.jar:
+$HIVE_HOME/lib/jdo2-api-2.3-ec.jar:
+$HIVE_HOME/conf:$HADOOP_HOME/conf:
+$HIVE_HOME/lib/slf4j-api-1.6.1.jar
+
+$HADOOP_HOME/bin/hadoop --config $HADOOP_HOME/conf jar &lt;path_to_jar&gt;
+&lt;main_class&gt; -libjars $LIB_JARS &lt;program_arguments&gt;
+</pre>
+<p>
+<strong>Authentication</strong>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+	
+<tr>
+	
+<td colspan="1" rowspan="1">
+<p>If a failure results in a message like "2010-11-03 16:17:28,225 WARN hive.metastore ... - Unable to connect metastore with URI thrift://..." in /tmp/&lt;username&gt;/hive.log, then make sure you have run "kinit &lt;username&gt;@FOO.COM" to get a Kerberos ticket and to be able to authenticate to the HCatalog server. </p>
+</td>
+	
+</tr>
+
+</table>
+<p>
+<strong>Read Example</strong>
+</p>
+<p>
+The following very simple MapReduce program reads data from one table which it assumes to have an integer in the
+second column, and counts how many different values it sees.   That is, is does the
+equivalent of <span class="codefrag">select col1, count(*) from $table group by col1;</span>.
+</p>
+<pre class="code">
+public class GroupByAge extends Configured implements Tool {
+
+    public static class Map extends
+            Mapper&lt;WritableComparable, HCatRecord, IntWritable, IntWritable&gt; {
+
+        int age;
+
+        @Override
+        protected void map(
+                WritableComparable key,
+                HCatRecord value,
+                org.apache.hadoop.mapreduce.Mapper&lt;WritableComparable, HCatRecord, 
+                        IntWritable, IntWritable&gt;.Context context)
+                throws IOException, InterruptedException {
+            age = (Integer) value.get(1);
+            context.write(new IntWritable(age), new IntWritable(1));
+        }
+    }
+
+    public static class Reduce extends Reducer&lt;IntWritable, IntWritable,
+    WritableComparable, HCatRecord&gt; {
+
+
+      @Override 
+      protected void reduce(
+              IntWritable key,
+              java.lang.Iterable&lt;IntWritable&gt; values, 
+              org.apache.hadoop.mapreduce.Reducer&lt;IntWritable, IntWritable,
+                      WritableComparable, HCatRecord&gt;.Context context)
+              throws IOException, InterruptedException {
+          int sum = 0;
+          Iterator&lt;IntWritable&gt; iter = values.iterator();
+          while (iter.hasNext()) {
+              sum++;
+              iter.next();
+          }
+          HCatRecord record = new DefaultHCatRecord(2);
+          record.set(0, key.get());
+          record.set(1, sum);
+
+          context.write(null, record);
+        }
+    }
+
+    public int run(String[] args) throws Exception {
+        Configuration conf = getConf();
+        args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+        String inputTableName = args[0];
+        String outputTableName = args[1];
+        String dbName = null;
+
+        Job job = new Job(conf, "GroupByAge");
+        HCatInputFormat.setInput(job, InputJobInfo.create(dbName,
+                inputTableName, null));
+        // initialize HCatOutputFormat
+
+        job.setInputFormatClass(HCatInputFormat.class);
+        job.setJarByClass(GroupByAge.class);
+        job.setMapperClass(Map.class);
+        job.setReducerClass(Reduce.class);
+        job.setMapOutputKeyClass(IntWritable.class);
+        job.setMapOutputValueClass(IntWritable.class);
+        job.setOutputKeyClass(WritableComparable.class);
+        job.setOutputValueClass(DefaultHCatRecord.class);
+        HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName,
+                outputTableName, null));
+        HCatSchema s = HCatOutputFormat.getTableSchema(job);
+        System.err.println("INFO: output schema explicitly set for writing:"
+                + s);
+        HCatOutputFormat.setSchema(job, s);
+        job.setOutputFormatClass(HCatOutputFormat.class);
+        return (job.waitForCompletion(true) ? 0 : 1);
+    }
+
+    public static void main(String[] args) throws Exception {
+        int exitCode = ToolRunner.run(new GroupByAge(), args);
+        System.exit(exitCode);
+    }
+}
+</pre>
+<p>Notice a number of important points about this program:</p>
+<ol>
+
+<li>The implementation of Map takes HCatRecord as an input and the implementation of Reduce produces it as an output.</li>
+
+<li>This example program assumes the schema of the input, but it could also retrieve the schema via
+HCatOutputFormat.getOutputSchema() and retrieve fields based on the results of that call.</li>
+
+<li>The input descriptor for the table to be read is created by calling InputJobInfo.create.  It requires the database name,
+table name, and partition filter.  In this example the partition filter is null, so all partitions of the table
+will be read.</li>
+
+<li>The output descriptor for the table to be written is created by calling OutputJobInfo.create.  It requires the
+database name, the table name, and a Map of partition keys and values that describe the partition being written.
+In this example it is assumed the table is unpartitioned, so this Map is null.</li>
+
+</ol>
+<p>To scan just selected partitions of a table, a filter describing the desired partitions can be passed to
+InputJobInfo.create.  To scan a single filter, the filter string should look like: "datestamp=20120401" where
+datestamp is the partition column name and 20120401 is the value you want to read.</p>
+<p>
+<strong>Filter Operators</strong>
+</p>
+<p>A filter can contain the operators 'and', 'or', 'like', '()', '=', '&lt;&gt;' (not equal), '&lt;', '&gt;', '&lt;='
+and '&gt;='.  For example: </p>
+<ul>
+
+<li>
+<span class="codefrag">datestamp &gt; "20110924"</span>
+</li>
+
+<li>
+<span class="codefrag">datestamp &lt; "20110925</span>
+</li>
+
+<li>
+<span class="codefrag">datestamp &lt;= "20110925" and datestamp &gt;= "20110924"</span>
+</li>
+
+</ul>
+<p>
+<strong>Scan Filter</strong>
+</p>
+<p>Assume for example you have a web_logs table that is partitioned by the column datestamp.  You could select one partition of the table by changing</p>
+<pre class="code">
+HCatInputFormat.setInput(job, InputJobInfo.create(dbName, inputTableName, null));
+</pre>
+<p>
+to
+</p>
+<pre class="code">
+HCatInputFormat.setInput(job,
+    InputJobInfo.create(dbName, inputTableName, "datestamp=\"20110924\""));
+  </pre>
+<p>
+This filter must reference only partition columns.  Values from other columns will cause the job to fail.</p>
+<p>
+<strong>Write Filter</strong>
+</p>
+<p>
+To write to a single partition you can change the above example to have a Map of key value pairs that describe all
+of the partition keys and values for that partition.  In our example web_logs table, there is only one partition
+column (datestamp), so our Map will have only one entry.  Change </p>
+<pre class="code">
+HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
+</pre>
+<p>to </p>
+<pre class="code">
+Map partitions = new HashMap&lt;String, String&gt;(1);
+partitions.put("datestamp", "20110924");
+HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, partitions));
+</pre>
+<p>To write multiple partitions simultaneously you can leave the Map null, but all of the partitioning columns must be present in the data you are writing.
+</p>
+</div>
+
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011-2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.pdf?rev=1339782&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.4.0/inputoutput.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.4.0/install.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/install.html?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/install.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/install.html Thu May 17 18:35:47 2012
@@ -0,0 +1,527 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Source Installation</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.4.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Installation From Tarball</div>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="install.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Source Installation</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Server+Installation+from+Source">Server Installation from Source</a>
+</li>
+<li>
+<a href="#Starting+the+Server">Starting the Server</a>
+</li>
+<li>
+<a href="#Logging">Logging</a>
+</li>
+<li>
+<a href="#Stopping+the+Server">Stopping the Server</a>
+</li>
+<li>
+<a href="#Client+Installation">Client Installation</a>
+</li>
+</ul>
+</div>
+</div>
+
+  
+<a name="Server+Installation+from+Source"></a>
+<h2 class="h3">Server Installation from Source</h2>
+<div class="section">
+<p>
+<strong>Prerequisites</strong>
+</p>
+<ul>
+        
+<li>Machine to build the installation tar on</li>
+        
+<li>Machine on which the server can be installed - this should have
+        access to the Hadoop cluster in question, and be accessible from
+        the machines you launch jobs from</li>
+        
+<li>an RDBMS - we recommend MySQL and provide instructions for it</li>
+        
+<li>Hadoop cluster</li>
+        
+<li>Unix user that the server will run as, and, if you are running your
+        cluster in secure mode, an associated Kerberos service principal and keytabs.</li>
+    
+</ul>
+<p>Throughout these instructions when you see a word in <em>italics</em> it
+    indicates a place where you should replace the word with a locally 
+    appropriate value such as a hostname or password.</p>
+<p>
+<strong>Building a tarball </strong>
+</p>
+<p>If you downloaded HCatalog from Apache or another site as a source release,
+    you will need to first build a tarball to install.  You can tell if you have
+    a source release by looking at the name of the object you downloaded.  If
+    it is named hcatalog-src-0.4.0-incubating.tar.gz (notice the
+    <strong>src</strong> in the name) then you have a source release.</p>
+<p>If you do not already have Apache Ant installed on your machine, you 
+    will need to obtain it.  You can get it from the <a href="http://ant.apache.org/">
+    Apache Ant website</a>.  Once you download it, you will need to unpack it
+    somewhere on your machine.  The directory where you unpack it will be referred
+    to as <em>ant_home</em> in this document.</p>
+<p>If you do not already have Apache Forrest installed on your machine, you 
+    will need to obtain it.  You can get it from the <a href="http://forrest.apache.org/">
+    Apache Forrest website</a>.  Once you download it, you will need to unpack 
+    it somewhere on your machine.  The directory where you unpack it will be referred
+    to as <em>forrest_home</em> in this document.</p>
+<p>To produce a tarball from this do the following:</p>
+<p>Create a directory to expand the source release in.  Copy the source
+    release to that directory and unpack it.</p>
+<p>
+<span class="codefrag">mkdir /tmp/hcat_source_release</span>
+</p>
+<p>
+<span class="codefrag">cp hcatalog-src-0.4.0-incubating.tar.gz /tmp/hcat_source_release</span>
+</p>
+<p>
+<span class="codefrag">cd /tmp/hcat_source_release</span>
+</p>
+<p>
+<span class="codefrag">tar xzf hcatalog-src-0.4.0-incubating.tar.gz</span>
+</p>
+<p>Change directories into the unpacked source release and build the
+    installation tarball.</p>
+<p>
+<span class="codefrag">cd hcatalog-src-0.4.0-incubating</span>
+</p>
+<p>
+<em>ant_home</em><span class="codefrag">/bin/ant -Dhcatalog.version=0.4.0
+    -Dforrest.home=</span><em>forrest_home</em><span class="codefrag"> tar </span>
+</p>
+<p>The tarball for installation should now be at
+    <span class="codefrag">build/hcatalog-0.4.0.tar.gz</span>
+</p>
+<p>
+<strong>Database Setup</strong>
+</p>
+<p>If you do not already have Hive installed with MySQL, the following will
+    walk you through how to do so.  If you have already set this up, you can skip
+    this step.</p>
+<p>Select a machine to install the database on.  This need not be the same
+    machine as the Thrift server, which we will set up later.  For large
+    clusters we recommend that they not be the same machine.  For the 
+    purposes of these instructions we will refer to this machine as
+    <em>hivedb.acme.com</em>
+</p>
+<p>Install MySQL server on <em>hivedb.acme.com</em>.  You can obtain
+    packages for MySQL from <a href="http://www.mysql.com/downloads/">MySQL's
+    download site</a>.  We have developed and tested with versions 5.1.46
+    and 5.1.48.  We suggest you use these versions or later.
+    Once you have MySQL up and running, use the <span class="codefrag">mysql</span> command line
+    tool to add the <span class="codefrag">hive</span> user and <span class="codefrag">hivemetastoredb</span>
+    database.  You will need to pick a password for your <span class="codefrag">hive</span>
+    user, and replace <em>dbpassword</em> in the following commands with it.</p>
+<p>
+<span class="codefrag">mysql -u root</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; CREATE USER 'hive'@'</span><em>hivedb.acme.com</em><span class="codefrag">' IDENTIFIED BY '</span><em>dbpassword</em><span class="codefrag">';</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; CREATE DATABASE hivemetastoredb DEFAULT CHARACTER SET latin1 DEFAULT COLLATE latin1_swedish_ci;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; GRANT ALL PRIVILEGES ON hivemetastoredb.* TO 'hive'@'</span><em>hivedb.acme.com</em><span class="codefrag">' WITH GRANT OPTION;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; flush privileges;</span>
+</p>
+<p>
+<span class="codefrag">mysql&gt; quit;</span>
+</p>
+<p>Use the database installation script found in the Hive package to create the
+    database.  <span class="codefrag">hive_home</span> in the line below refers to the directory
+    where you have installed Hive.  If you are using Hive rpms, then this will
+    be <span class="codefrag">/usr/lib/hive</span>.</p>
+<p>
+<span class="codefrag">mysql -u hive -D hivemetastoredb -h</span><em>hivedb.acme.com</em><span class="codefrag"> -p &lt; </span><em>hive_home</em><span class="codefrag">/scripts/metastore/upgrade/mysql/hive-schema-0.9.0.mysql.sql</span>
+</p>
+<p>
+<strong>Thrift Server Setup</strong>
+</p>
+<p>If you do not already have Hive running a metastore server using Thrift,
+    you can use the following instructions to setup and run one.  You may skip
+    this step if you already are using a Hive metastore server.</p>
+<p>Select a machine to install your Thrift server on.  For smaller and test
+    installations this can be the same machine as the database.  For the
+    purposes of these instructions we will refer to this machine as
+    <em>hcatsvr.acme.com</em>.</p>
+<p>If you have not already done so, install Hive 0.9 on this machine.  You
+    can use the
+    <a href="http://hive.apache.org/releases.html">binary distributions</a> 
+    provided by Hive or rpms available from
+    <a href="http://incubator.apache.org/bigtop/">Apache Bigtop</a>.  If you use
+    the Apache Hive binary distribution, select a directory, henceforth 
+    referred to as <span class="codefrag">hive_home</span>, and untar the distribution there.
+    If you use the rpms, <span class="codefrag">hive_home</span> will be
+    <span class="codefrag">/usr/lib/hive</span>.</p>
+<p>Install the MySQL Java connector libraries on <em>hcatsvr.acme.com</em>.
+    You can obtain these from
+    <a href="http://www.mysql.com/downloads/connector/j/5.1.html">MySQL's
+    download site</a>.</p>
+<p>Select a user to run the Thrift server as.  This user should not be a
+    human user, and must be able to act as a proxy for other users.  We suggest
+    the name "hive" for the user.  Throughout the rest of this documentation 
+    we will refer to this user as <em>hive</em>.  If necessary, add the user to 
+    <em>hcatsvr.acme.com</em>.</p>
+<p>Select a <em>root</em> directory for your installation of HCatalog.  This 
+    directory must be owned by the <em>hive</em> user.  We recommend
+    <span class="codefrag">/usr/local/hive</span>.  If necessary, create the directory.  You will
+    need to be the <em>hive</em> user for the operations described in the remainder
+    of this Thrift Server Setup section.</p>
+<p>Copy the HCatalog installation tarball into a temporary directory, and untar
+    it.  Then change directories into the new distribution and run the HCatalog
+    server installation script.  You will need to know the directory you chose
+    as <em>root</em> and the
+    directory you installed the MySQL Java connector libraries into (referred
+    to in the command below as <em>dbroot</em>).  You will also need your
+    <em>hadoop_home</em>, the directory where you have Hadoop installed, and 
+    the port number you wish HCatalog to operate on which you will use to set
+    <em>portnum</em>.</p>
+<p>
+<span class="codefrag">tar zxf hcatalog-0.4.0.tar.gz</span>
+</p>
+<p>
+<span class="codefrag">cd hcatalog-0.4.0</span>
+</p>
+<p>
+<span class="codefrag">share/hcatalog/scripts/hcat_server_install.sh -r </span><em>root</em><span class="codefrag"> -d </span><em>dbroot</em><span class="codefrag"> -h </span><em>hadoop_home</em><span class="codefrag"> -p </span><em>portnum</em>
+</p>
+<p>Now you need to edit your <em>hive_home</em><span class="codefrag">/conf/hive-site.xml</span> file.
+    Open this file in your favorite text editor.  The following table shows the
+    values you need to configure.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<th colspan="1" rowspan="1">Parameter</th>
+            <th colspan="1" rowspan="1">Value to Set it to</th>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.local</td>
+            <td colspan="1" rowspan="1">false</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionURL</td>
+            <td colspan="1" rowspan="1">jdbc:mysql://<em>hostname</em>/hivemetastoredb?createDatabaseIfNotExist=true where <em>hostname</em> is the name of the machine you installed MySQL on.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionDriverName</td>
+            <td colspan="1" rowspan="1">com.mysql.jdbc.Driver</td>
+        
+</tr>
+
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionUserName</td>
+            <td colspan="1" rowspan="1">hive</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">javax.jdo.option.ConnectionPassword</td>
+            <td colspan="1" rowspan="1"><em>dbpassword</em> value you used in setting up the MySQL server
+            above.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.semantic.analyzer.factory.impl</td>
+            <td colspan="1" rowspan="1">org.apache.hcatalog.cli.HCatSemanticAnalyzerFactory</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hadoop.clientside.fs.operations</td>
+            <td colspan="1" rowspan="1">true</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.warehouse.dir</td>
+            <td colspan="1" rowspan="1">The directory can be a URI or an absolute file path. If it is an absolute file path, it will be resolved to a URI by the metastore:
+            <p>-- If default hdfs was specified in core-site.xml, path resolves to HDFS location. </p>
+            
+<p>-- Otherwise, path is resolved as local file: URI.</p>
+            
+<p>This setting becomes effective when creating new tables (it takes precedence over default DBS.DB_LOCATION_URI at the time of table creation).</p>
+            
+<p>You only need to set this if you have not yet configured Hive to run on your system.</p>
+            
+</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.uris</td>
+            <td colspan="1" rowspan="1">thrift://<em>hostname</em>:<em>portnum</em> where <em>hostname</em> is the name of the machine hosting the Thrift server, and <em>portnum</em> is the port number
+            used above in the installation script.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.execute.setugi</td>
+            <td colspan="1" rowspan="1">true</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.sasl.enabled</td>
+            <td colspan="1" rowspan="1">Set to true if you are using kerberos security with your Hadoop
+            cluster, false otherwise.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.kerberos.keytab.file</td>
+            <td colspan="1" rowspan="1">The path to the Kerberos keytab file containing the metastore
+            Thrift server's service principal.  Only required if you set
+            hive.metastore.sasl.enabled above to true.</td>
+        
+</tr>
+        
+<tr>
+            
+<td colspan="1" rowspan="1">hive.metastore.kerberos.principal</td>
+            <td colspan="1" rowspan="1">The service principal for the metastore Thrift server.  You can
+            reference your host as _HOST and it will be replaced with your
+            actual hostname.  Only required if you set
+            hive.metastore.sasl.enabled above to true.</td>
+        
+</tr>
+    
+</table>
+<p>You can now procede to starting the server.</p>
+</div>
+
+  
+<a name="Starting+the+Server"></a>
+<h2 class="h3">Starting the Server</h2>
+<div class="section">
+<p>To start your server, HCatalog needs to know where Hive is installed.
+    This is communicated by setting the environment variable <span class="codefrag">HIVE_HOME</span>
+    to the location you installed Hive.  Start the HCatalog server by switching directories to
+    <em>root</em> and invoking <span class="codefrag">HIVE_HOME=</span><em>hive_home</em><span class="codefrag"> sbin/hcat_server.sh start</span>
+</p>
+</div>
+
+  
+<a name="Logging"></a>
+<h2 class="h3">Logging</h2>
+<div class="section">
+<p>Server activity logs are located in
+    <em>root</em><span class="codefrag">/var/log/hcat_server</span>.  Logging configuration is located at
+    <em>root</em><span class="codefrag">/conf/log4j.properties</span>.  Server logging uses
+    <span class="codefrag">DailyRollingFileAppender</span> by default. It will generate a new
+    file per day and does not expire old log files automatically.</p>
+</div>
+
+  
+<a name="Stopping+the+Server"></a>
+<h2 class="h3">Stopping the Server</h2>
+<div class="section">
+<p>To stop the HCatalog server, change directories to the <em>root</em>
+    directory and invoking <span class="codefrag">HIVE_HOME=</span><em>hive_home</em><span class="codefrag"> sbin/hcat_server.sh stop</span>
+</p>
+</div>
+
+  
+<a name="Client+Installation"></a>
+<h2 class="h3">Client Installation</h2>
+<div class="section">
+<p>Select a <em>root</em> directory for your installation of HCatalog client.
+    We recommend <span class="codefrag">/usr/local/hcat</span>.  If necessary, create the directory.</p>
+<p>Copy the HCatalog installation tarball into a temporary directory, and untar
+    it.</p>
+<p>
+<span class="codefrag">tar zxf hcatalog-0.4.0.tar.gz</span>
+</p>
+<p>Now you need to edit your <em>hive_home</em><span class="codefrag">/conf/hive-site.xml</span> file.
+    You can use the same file as on the server <strong>except the value of 
+    </strong><span class="codefrag">javax.jdo.option.ConnectionPasswordh</span><strong> should be
+    removed</strong>.  This avoids having the password available in plain text on
+    all of your clients.</p>
+<p>The HCatalog command line interface (CLI) can now be invoked as
+    <span class="codefrag">HIVE_HOME=</span><em>hive_home root</em><span class="codefrag">/bin/hcat</span>.</p>
+</div>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011-2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/install.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/install.pdf?rev=1339782&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.4.0/install.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.html?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.html Thu May 17 18:35:47 2012
@@ -0,0 +1,239 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Site Linkmap Table of Contents</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.4.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">HCatalog</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Installation From Tarball</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="linkmap.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Site Linkmap Table of Contents</h1>
+<div id="front-matter"></div>
+<p>
+          This is a map of the complete site and its structure.
+        </p>
+<ul>
+<li>
+<a>HCatalog</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>site</em>
+</li>
+<ul>
+
+  
+<ul>
+<li>
+<a>HCatalog</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>docs</em>
+</li>
+<ul> 
+    
+<ul>
+<li>
+<a href="index.html">Overview</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="install.html">Installation From Tarball</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="loadstore.html">Load &amp; Store Interfaces</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="cli.html">Command Line Interface </a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="supportedformats.html">Storage Formats</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="dynpartition.html">Dynamic Partitioning</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="notification.html">Notification</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>index</em>
+</li>
+</ul>    
+
+    
+<ul>
+<li>
+<a href="api/index.html">API Docs</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
+</li>
+</ul>
+  
+</ul>
+</ul>  
+
+
+</ul>
+</ul>
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011-2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.pdf?rev=1339782&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.4.0/linkmap.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.html?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.html Thu May 17 18:35:47 2012
@@ -0,0 +1,555 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Load and Store Interfaces</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.4.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Installation From Tarball</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Load &amp; Store Interfaces</div>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menuitem">
+<a href="notification.html">Notification</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="loadstore.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Load and Store Interfaces</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Set+Up">Set Up</a>
+</li>
+<li>
+<a href="#HCatLoader">HCatLoader</a>
+<ul class="minitoc">
+<li>
+<a href="#Usage">Usage</a>
+</li>
+<li>
+<a href="#HCatalog+Data+Types">HCatalog Data Types</a>
+</li>
+<li>
+<a href="#Running+Pig+with+HCatalog">Running Pig with HCatalog</a>
+</li>
+<li>
+<a href="#Load+Examples">Load Examples</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#HCatStorer">HCatStorer</a>
+<ul class="minitoc">
+<li>
+<a href="#Usage-N1013D">Usage</a>
+</li>
+<li>
+<a href="#Store+Examples">Store Examples</a>
+</li>
+<li>
+<a href="#HCatalog+Data+Types-N1017D">HCatalog Data Types</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+ 
+ <!-- ==================================================================== --> 
+  
+<a name="Set+Up"></a>
+<h2 class="h3">Set Up</h2>
+<div class="section">
+<p>The HCatLoader and HCatStorer interfaces are used with Pig scripts to read and write data in HCatalog managed tables.</p>
+</div>
+  
+      
+<!-- ==================================================================== -->
+     
+<a name="HCatLoader"></a>
+<h2 class="h3">HCatLoader</h2>
+<div class="section">
+<p>HCatLoader is used with Pig scripts to read data from HCatalog managed tables.</p>
+<a name="Usage"></a>
+<h3 class="h4">Usage</h3>
+<p>HCatLoader is accessed via a Pig load statement.</p>
+<pre class="code">
+A = LOAD 'tablename' USING org.apache.hcatalog.pig.HCatLoader(); 
+</pre>
+<p>
+<strong>Assumptions</strong>
+</p>
+<p>You must specify the table name in single quotes: LOAD 'tablename'. If you are using a non-default database you must specify your input as 'dbname.tablename'. If you are using Pig 0.9.2 or earlier, you must create your database and table prior to running the Pig script. Beginning with Pig 0.10 you can issue these create commands in Pig using the SQL command.</p>
+<p>The Hive metastore lets you create tables without specifying a database; if you
+    created tables this way, then the database name is 'default' and is not required when
+    specifying the table for HCatLoader. </p>
+<p>If the table is partitioned, you can indicate which partitions to scan by immediately following the load statement with a partition filter statement 
+    (see <strong>Examples</strong>). </p>
+<a name="HCatalog+Data+Types"></a>
+<h3 class="h4">HCatalog Data Types</h3>
+<p>Restrictions apply to the types of columns HCatLoader can read.</p>
+<p>HCatLoader  can read <strong>only</strong> the data types listed in the table. 
+The table shows how Pig will interpret the HCatalog data type.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>
+<strong>HCatalog Data Type</strong>
+</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>
+<strong>Pig Data Type</strong>
+</p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>primitives (int, long, float, double, string) </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>int, long, float, double, string to chararray </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>map (key type should be string, valuetype must be string)</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>map </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;any type&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>struct&lt;any type fields&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>tuple </p>
+            
+</td>
+    
+</tr>
+ 
+</table>
+<a name="Running+Pig+with+HCatalog"></a>
+<h3 class="h4">Running Pig with HCatalog</h3>
+<p>Pig does not automatically pick up HCatalog jars. You will need tell Pig where your HCatalog jars are.
+These include the Hive jars used by the HCatalog client. To do this, you must define the environment
+variable PIG_CLASSPATH with the appropriate jars. HCat can tell you the jars it needs. In order to do this it
+needs to know where Hadoop is installed. Also, you need to tell Pig the URI for your metastore, in the PIG_OPTS
+variable. In the case where you have installed Hadoop and HCatalog via tar, you can do:</p>
+<pre class="code">
+export HADOOP_HOME=&lt;path_to_hadoop_install&gt;
+export HCAT_HOME=&lt;path_to_hcat_install&gt;
+export PIG_CLASSPATH=$HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar:$HIVE_HOME/lib/hive-metastore-0.9.0.jar:
+$HIVE_HOME/lib/libthrift-0.7.0.jar:$HIVE_HOME/lib/hive-exec-0.9.0.jar:$HIVE_HOME/lib/libfb303-0.7.0.jar:
+$HIVE_HOME/lib/jdo2-api-2.3-ec.jar:$HIVE_HOME/conf:$HADOOP_HOME/conf:$HIVE_HOME/lib/slf4j-api-1.6.1.jar
+
+export PIG_OPTS=-Dhive.metastore.uris=thrift://&lt;hostname&gt;:&lt;port&gt;
+
+&lt;path_to_pig_install&gt;/bin/pig -Dpig.additional.jars=$HCAT_HOME/share/hcatalog/hcatalog-0.4.0.jar:
+$HIVE_HOME/lib/hive-metastore-0.9.0.jar:$HIVE_HOME/lib/libthrift-0.7.0.jar:$HIVE_HOME/lib/hive-exec-0.9.0.jar:
+$HIVE_HOME/lib/libfb303-0.7.0.jar:$HIVE_HOME/lib/jdo2-api-2.3-ec.jar:$HIVE_HOME/lib/slf4j-api-1.6.1.jar &lt;script.pig&gt;
+</pre>
+<p>
+<strong>Authentication</strong>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+	
+<tr>
+	
+<td colspan="1" rowspan="1">
+<p>If you are using a secure cluster and a failure results in a message like "2010-11-03 16:17:28,225 WARN hive.metastore ... - Unable to connect metastore with URI thrift://..." in /tmp/&lt;username&gt;/hive.log, then make sure you have run "kinit &lt;username&gt;@FOO.COM" to get a Kerberos ticket and to be able to authenticate to the HCatalog server. </p>
+</td>
+	
+</tr>
+
+</table>
+<a name="Load+Examples"></a>
+<h3 class="h4">Load Examples</h3>
+<p>This load statement will load all partitions of the specified table.</p>
+<pre class="code">
+/* myscript.pig */
+A = LOAD 'tablename' USING org.apache.hcatalog.pig.HCatLoader(); 
+...
+...
+</pre>
+<p>If only some partitions of the specified table are needed, include a partition filter statement <strong>immediately</strong> following the load statement in the data flow. (In the script, however, a filter statement might not immediately follow its load statement.) The filter statement can include conditions on partition as well as non-partition columns.</p>
+<pre class="code">
+/* myscript.pig */
+A = LOAD 'tablename' USING  org.apache.hcatalog.pig.HCatLoader();
+
+-- date is a partition column; age is not
+B = filter A by date == '20100819' and age &lt; 30; 
+
+-- both date and country are partition columns
+C = filter A by date == '20100819' and country == 'US'; 
+...
+...
+</pre>
+<p>To scan a whole table, for example:</p>
+<pre class="code">
+a = load 'student_data' using org.apache.hcatalog.pig.HCatLoader();
+b = foreach a generate name, age;
+</pre>
+<p>Notice that the schema is automatically provided to Pig, there's no need to declare name and age as fields, as if
+you were loading from a file.</p>
+<p>To scan a single partition of the table web_logs, for example, partitioned by the column datestamp:</p>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp == '20110924';
+</pre>
+<p>Pig will push the datestamp filter shown here to HCatalog, so that HCatalog knows to just scan the partition where
+datestamp = '20110924'. You can combine this filter with others via 'and':</p>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp == '20110924' and user is not null;
+</pre>
+<p>Pig will split the above filter, pushing the datestamp portion to HCatalog and retaining the <span class="codefrag">user is not null</span> part
+to apply itself. You can also give a more complex filter to retrieve a set of partitions.</p>
+<p>
+<strong>Filter Operators</strong>
+</p>
+<p>A filter can contain the operators 'and', 'or', '()', '==', '!=', '&lt;', '&gt;', '&lt;='
+and '&gt;='.</p>
+<p>For example:</p>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp &gt; '20110924';
+</pre>
+<p>A complex filter can have various combinations of operators, such as:</p>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp == '20110924' or datestamp == '20110925';
+</pre>
+<p>These two examples have the same effect:</p>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp &gt;= '20110924' and datestamp &lt;= '20110925';
+</pre>
+<pre class="code">
+a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader();
+b = filter a by datestamp &lt;= '20110925' and datestamp &gt;= '20110924';
+</pre>
+</div> 
+	
+<!-- ==================================================================== -->	
+	
+<a name="HCatStorer"></a>
+<h2 class="h3">HCatStorer</h2>
+<div class="section">
+<p>HCatStorer is used with Pig scripts to write data to HCatalog managed tables.</p>
+<a name="Usage-N1013D"></a>
+<h3 class="h4">Usage</h3>
+<p>HCatStorer is accessed via a Pig store statement.</p>
+<pre class="code">
+A = LOAD ...
+B = FOREACH A ...
+...
+...
+my_processed_data = ...
+
+STORE my_processed_data INTO 'tablename' USING
+ org.apache.hcatalog.pig.HCatStorer();
+</pre>
+<p>
+<strong>Assumptions</strong>
+</p>
+<p>You must specify the table name in single quotes: LOAD 'tablename'. Both the database and table must be created prior to running your Pig script. If you are using a non-default database you must specify your input as 'dbname.tablename'. If you are using Pig 0.9.2 or earlier, you must create your database and table prior to running the Pig script. Beginning with Pig 0.10 you can issue these create commands in Pig using the SQL command. </p>
+<p>The Hive metastore lets you create tables without specifying a database; if you created
+tables this way, then the database name is 'default' and you do not need to specify the
+database name in the store statement. </p>
+<p>For the USING clause, you can have a string argument that represents key/value pairs
+for partition. This is a mandatory argument when you are writing to a partitioned table
+and the partition column is not in the output column.  The values for partition keys
+should NOT be quoted.</p>
+<p>If partition columns are present in data they need not be specified as a STORE argument. Instead HCatalog will use these values to place records in the appropriate partition(s). It is valid to specify some partition keys in the STORE statement and have other partition keys in the data.</p>
+<p></p>
+<p></p>
+<a name="Store+Examples"></a>
+<h3 class="h4">Store Examples</h3>
+<p>You can write to non-partitioned table simply by using HCatStorer.  The contents of the table will be overwritten:</p>
+<pre class="code">store z into 'student_data' using org.apache.hcatalog.pig.HCatStorer();</pre>
+<p>To add one new partition to a partitioned table, specify the partition value in store function.  Pay careful
+attention to the quoting, as the whole string must be single quoted and separated with an equals sign:</p>
+<pre class="code">store z into 'web_data' using org.apache.hcatalog.pig.HCatStorer('datestamp=20110924');</pre>
+<p>To write into multiple partitions at one, make sure that the partition column is present in your data, then call
+HCatStorer with no argument:</p>
+<pre class="code">store z into 'web_data' using org.apache.hcatalog.pig.HCatStorer(); 
+  -- datestamp must be a field in the relation z</pre>
+<a name="HCatalog+Data+Types-N1017D"></a>
+<h3 class="h4">HCatalog Data Types</h3>
+<p>Restrictions apply to the types of columns HCatStorer can write.</p>
+<p>HCatStorer can write <strong>only</strong> the data types listed in the table. 
+The table shows how Pig will interpret the HCatalog data type.</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>
+<strong>HCatalog Data Type</strong>
+</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>
+<strong>Pig Data Type</strong>
+</p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>primitives (int, long, float, double, string) </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>int, long, float, double, string to chararray </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>map (key type should be string, valuetype must be string)</p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>map </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>List&lt;any type&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>bag </p>
+            
+</td>
+    
+</tr>
+    
+<tr>
+            
+<td colspan="1" rowspan="1">
+               
+<p>struct&lt;any type fields&gt; </p>
+            
+</td>
+            <td colspan="1" rowspan="1">
+               
+<p>tuple </p>
+            
+</td>
+    
+</tr>
+ 
+</table>
+</div>
+	
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011-2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.pdf?rev=1339782&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.4.0/loadstore.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.4.0/locationmap.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/locationmap.xml?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/locationmap.xml (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/locationmap.xml Thu May 17 18:35:47 2012
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<locationmap xmlns="http://apache.org/forrest/locationmap/1.0">
+  <components>
+    <matchers default="lm">
+      <matcher name="lm" src="org.apache.forrest.locationmap.WildcardLocationMapHintMatcher"/>
+    </matchers>
+<!--
+      * Can contain any sitemap selector with the following syntax. * 
+    <selectors default="exists">
+      <selector name="exists" logger="sitemap.selector.exists"  
+          src="org.apache.forrest.sourceexists.SourceExistsSelector" />
+    </selectors>
+    -->
+  </components>
+<!--
+    * Can contain a mount statement as a sibling to components and locator *
+    <mount src="somelocation.xml"/>
+  -->
+  <locator>
+<!--
+      * Can contain a mount within a selector where a selector is valid. 
+    <select>
+      <mount src="somelocation.xml"/>
+    </select>
+    -->
+    <match pattern="project.rewrite.**">
+      <location src="http://cocoon.apache.org/{1}.html"/>
+    </match>
+    <match pattern="project.remote.**.xml">
+      <location src="http://svn.apache.org/repos/asf/forrest/trunk/main/fresh-site/src/documentation/content/xdocs/{1}.xml"/>
+    </match>
+<!-- 
+      * Can use a selector inside a match.  *
+    <match pattern="somepattern/**">
+      <select>
+        <location src="first-location-attempted"/>
+        <location src="second-location-attempted"/>
+        <location src="third-location-attepted"/>
+      </select>
+    </match>
+    -->
+<!--
+     To locate all your source documents in a slide repository you can do:
+
+    <match pattern="tabs.xml">
+      <location src="http://127.0.0.1:8080/slide/files/tabs.xml"/>
+    </match>
+    <match pattern="site.xml">
+      <location src="http://127.0.0.1:8080/slide/files/site.xml"/>
+    </match>
+    <match pattern="**.xml">
+      <location src="http://127.0.0.1:8080/slide/files/{1}.xml"/>
+    </match>
+    -->
+  </locator>
+</locationmap>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/notification.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/notification.html?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/notification.html (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/notification.html Thu May 17 18:35:47 2012
@@ -0,0 +1,404 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.9">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Notification</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat.jpg" title=""></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogoA1">
+<a href=""><img class="logoImage" alt="HCatalog" src="images/hcat-box.jpg" title="A table abstraction on top of data for use with java MapReduce programs, Pig scripts and Hive queryies."></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">HCatalog 0.4.0 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">HCatalog</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="install.html">Installation From Tarball</a>
+</div>
+<div class="menuitem">
+<a href="loadstore.html">Load &amp; Store Interfaces</a>
+</div>
+<div class="menuitem">
+<a href="inputoutput.html">Input &amp; Output Interfaces </a>
+</div>
+<div class="menuitem">
+<a href="cli.html">Command Line Interface </a>
+</div>
+<div class="menuitem">
+<a href="supportedformats.html">Storage Formats</a>
+</div>
+<div class="menuitem">
+<a href="dynpartition.html">Dynamic Partitioning</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Notification</div>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="notification.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Notification</h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Notification+for+a+New+Partition">Notification for a New Partition</a>
+</li>
+<li>
+<a href="#Notification+for+a+Set+of+Partitions">Notification for a Set of Partitions</a>
+</li>
+<li>
+<a href="#Server+Configuration">Server Configuration</a>
+</li>
+</ul>
+</div>
+</div>
+  
+ 
+<p>Since HCatalog 0.2 provides notifications for certain events happening in the system. This way applications such as Oozie can wait for those events and schedule the work that depends on them. The current version of HCatalog supports two kinds of events: </p>
+
+<ul>
+
+<li>Notification when a new partition is added</li>
+
+<li>Notification when a set of partitions is added</li>
+
+</ul>
+
+
+<p>No additional work is required to send a notification when a new partition is added: the existing addPartition call will send the notification message.</p>
+
+
+<a name="Notification+for+a+New+Partition"></a>
+<h2 class="h3">Notification for a New Partition</h2>
+<div class="section">
+<p>To receive notification that a new partition has been added, you need to follow these three steps.</p>
+<p>1. To start receiving messages, create a connection to a message bus as shown here:</p>
+<pre class="code">
+ConnectionFactory connFac = new ActiveMQConnectionFactory(amqurl);
+Connection conn = connFac.createConnection();
+conn.start();
+ </pre>
+<p>2. Subscribe to a topic you are interested in. When subscribing on a message bus, you need to subscribe to a particular topic to receive the messages that are being delivered on that topic. </p>
+<ul>
+  
+<li>  
+  
+<p>The topic name corresponding to a particular table is stored in table properties and can be retrieved using the following piece of code: </p>
+ 
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(hiveConf);
+String topicName = msc.getTable("mydb", "myTbl").getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME);
+ </pre>
+ 
+</li>
+  
+  
+<li>  
+  
+<p>Use the topic name to subscribe to a topic as follows: </p>
+ 
+<pre class="code">
+Session session = conn.createSession(true, Session.SESSION_TRANSACTED);
+Destination hcatTopic = session.createTopic(topicName);
+MessageConsumer consumer = session.createConsumer(hcatTopic);
+consumer.setMessageListener(this);
+ </pre>
+ 
+</li>
+  
+</ul>
+<p>3. To start receiving messages you need to implement the JMS interface <span class="codefrag">MessageListener</span>, which, in turn, will make you implement the method <span class="codefrag">onMessage(Message msg)</span>. This method will be called whenever a new message arrives on the message bus. The message contains a partition object representing the corresponding partition, which can be retrieved as shown here: </p>
+<pre class="code">
+@Override
+   public void onMessage(Message msg) {
+      // We are interested in only add_partition events on this table.
+      // So, check message type first.
+      if(msg.getStringProperty(HCatConstants.HCAT_EVENT).equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)){
+          Object obj = (((ObjectMessage)msg).getObject());
+      }
+   }
+ </pre>
+<p>You need to have a JMS jar in your classpath to make this work. Additionally, you need to have a JMS provider&rsquo;s jar in your classpath. HCatalog is tested with ActiveMQ as a JMS provider, although any JMS provider can be used. ActiveMQ can be obtained from: http://activemq.apache.org/activemq-550-release.html .</p>
+</div>
+
+
+<a name="Notification+for+a+Set+of+Partitions"></a>
+<h2 class="h3">Notification for a Set of Partitions</h2>
+<div class="section">
+<p>Sometimes a user wants to wait until a collection of partitions is finished. For example, you may want to start processing after all partitions for a day are done. However, HCatalog has no notion of collections or hierarchies of partitions. To support this, HCatalog allows data writers to signal when they are finished writing a collection of partitions. Data readers may wait for this signal before beginning to read.</p>
+<p>The example code below illustrates how to send a notification when a set of partitions has been added.</p>
+<p>To signal, a data writer does this:</p>
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(conf);
+
+// Create a map, specifying partition key names and values
+Map&lt;String,String&gt; partMap = new HashMap&lt;String, String&gt;();
+partMap.put("date","20110711");
+partMap.put("country","*");
+
+// Mark the partition as "done"
+msc.markPartitionForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+</pre>
+<p>To receive this notification, the consumer needs to do the following:</p>
+<ol>
+
+<li>Repeat steps one and two from above to establish the connection to the notification system and to subscribe to the topic.</li>
+
+<li>Receive the notification as shown in this example:
+<pre class="code">
+HiveMetaStoreClient msc = new HiveMetaStoreClient(conf);
+
+// Create a map, specifying partition key names and values
+Map&lt;String,String&gt; partMap = new HashMap&lt;String, String&gt;();
+partMap.put("date","20110711");
+partMap.put("country","*");
+
+// Mark the partition as "done"
+msc.markPartitionForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+</pre>
+
+</li>
+
+</ol>
+<p>If the consumer has registered with the message bus and is currently live, it will get the callback from the message bus once the producer marks the partition as "done".  Alternatively, the consumer can ask explicitly for a particular partition from the metastore. The following code illustrates the usage from a consumer's perspective:</p>
+<pre class="code">
+// Enquire to metastore whether a particular partition has been marked or not.
+boolean marked = msc.isPartitionMarkedForEvent("mydb", "mytbl", partMap, PartitionEventType.LOAD_DONE);
+
+// Or register to a message bus and get asynchronous callback.
+ConnectionFactory connFac = new ActiveMQConnectionFactory(amqurl);
+Connection conn = connFac.createConnection();
+conn.start();
+Session session = conn.createSession(true, Session.SESSION_TRANSACTED);
+Destination hcatTopic = session.createTopic(topic);
+MessageConsumer consumer = session.createConsumer(hcatTopic);
+consumer.setMessageListener(this);
+
+
+public void onMessage(Message msg) {
+
+                                
+  MapMessage mapMsg = (MapMessage)msg;
+  Enumeration&lt;String&gt; keys = mapMsg.getMapNames();
+  
+  // Enumerate over all keys. This will print key-value pairs specifying the  
+  // particular partition 44which was marked done. In this case, it will print:
+  // date : 20110711
+  // country: *
+
+  while(keys.hasMoreElements()){
+    String key = keys.nextElement();
+    System.out.println(key + " : " + mapMsg.getString(key));
+  }
+  System.out.println("Message: "+msg);
+</pre>
+</div>
+
+
+<a name="Server+Configuration"></a>
+<h2 class="h3">Server Configuration</h2>
+<div class="section">
+<p>To enable notification, you need to configure the server (see below). </p>
+<p>To disable notification, you need to leave <span class="codefrag">hive.metastore.event.listeners</span> blank or remove it from <span class="codefrag">hive-site.xml.</span>
+</p>
+<p>
+<strong>Enable JMS Notifications</strong>
+</p>
+<p>You need to make (add/modify) the following changes to the hive-site.xml file of your HCatalog server to turn on notifications.</p>
+<pre class="code">
+&lt;property&gt;
+&lt;name&gt;hive.metastore.event.expiry.duration&lt;/name&gt;
+&lt;value&gt;300L&lt;/value&gt;
+&lt;description&gt;Duration after which events expire from events table (in seconds)&lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+&lt;name&gt;hive.metastore.event.clean.freq&lt;/name&gt;
+&lt;value&gt;360L&lt;/value&gt;
+&lt;description&gt;Frequency at which timer task runs to purge expired events in metastore(in seconds).&lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+&lt;name&gt;msgbus.brokerurl&lt;/name&gt;
+&lt;value&gt;tcp://localhost:61616&lt;/value&gt;
+&lt;description&gt;&lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+&lt;name&gt;msgbus.username&lt;/name&gt;
+&lt;value&gt;&lt;/value&gt;
+&lt;description&gt;&lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+&lt;name&gt;msgbus.password&lt;/name&gt;
+&lt;value&gt;&lt;/value&gt;
+&lt;description&gt;&lt;/description&gt;
+&lt;/property&gt;
+</pre>
+<p>For the server to start with support for notifications, the following must be in the classpath:</p>
+<ul>
+	
+<li>(a) activemq jar </li>
+    
+<li>(b) jndi.properties file with properties suitably configured for notifications</li>
+
+</ul>
+<p></p>
+<p>Then, follow these steps:</p>
+<ol>
+
+<li>HCatalog server start script is $YOUR_HCAT_SERVER/share/hcatalog/scripts/hcat_server_start.sh</li>
+
+<li>This script expects classpath to be set by the AUX_CLASSPATH environment variable.</li>
+
+<li>Therefore set AUX_CLASSPATH to satisfy (a) and (b) above.</li>
+
+<li>jndi.properties file is located at $YOUR_HCAT_SERVER/etc/hcatalog/jndi.properties</li>
+
+<li>You need to uncomment and set the following properties in this file: -
+<ul>
+
+<li>java.naming.factory.initial = org.apache.activemq.jndi.ActiveMQInitialContextFactory</li>
+
+<li>java.naming.provider.url = tcp://localhost:61616 (this is activemq url in your setup)
+</li>
+
+</ul>
+
+</li>
+
+</ol>
+<p>
+<strong>Topic Names</strong>
+</p>
+<p>If tables are created while the server is configured for notifications, a default topic name is automatically set as table property. To use notifications with tables created previously (previous HCatalog installations or created prior to enabling notifications), you will have to manually set a topic name, an example will be: </p>
+<pre class="code">
+$YOUR_HCAT_CLIENT_HOME/bin/hcat -e "ALTER TABLE access SET hcat.msgbus.topic.name=$TOPIC_NAME"
+</pre>
+<p>You then need to configure your activemq Consumer(s) to listen for messages on the topic you gave in $TOPIC_NAME. A good default policy for TOPIC_NAME = "$database.$table" (that is a literal dot)</p>
+</div>
+    
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2011-2012 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/notification.pdf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/notification.pdf?rev=1339782&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/hcatalog/site/publish/docs/r0.4.0/notification.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_de.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_de.xml?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_de.xml (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_de.xml Thu May 17 18:35:47 2012
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message key="Font size:">Schriftgrösse:</message>
+  <message key="Last Published:">Zuletzt veröffentlicht:</message>
+  <message key="Search">Suche:</message>
+  <message key="Search the site with">Suche auf der Seite mit</message>
+</catalogue>

Added: incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_en_US.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_en_US.xml?rev=1339782&view=auto
==============================================================================
--- incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_en_US.xml (added)
+++ incubator/hcatalog/site/publish/docs/r0.4.0/skin/CommonMessages_en_US.xml Thu May 17 18:35:47 2012
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message  key="Font size:">Font size:</message>
+  <message key="Last Published:">Last Published:</message>
+  <message key="Search">Search</message>
+  <message key="Search the site with">Search site with</message>
+</catalogue>



Mime
View raw message