incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1144122 - in /incubator/hcatalog/trunk: ./ src/docs/ src/docs/src/documentation/content/xdocs/ src/java/org/apache/hcatalog/mapreduce/ src/java/org/apache/hcatalog/pig/
Date Fri, 08 Jul 2011 01:35:15 GMT
Author: hashutosh
Date: Fri Jul  8 01:35:15 2011
New Revision: 1144122

URL: http://svn.apache.org/viewvc?rev=1144122&view=rev
Log:
HCATALOG-54 Javadoc is not being built as part of HCatalog docs

Added:
    incubator/hcatalog/trunk/src/docs/overview.html
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/build.xml
    incubator/hcatalog/trunk/src/docs/src/documentation/content/xdocs/site.xml
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/PigHCatUtil.java

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Fri Jul  8 01:35:15 2011
@@ -14,6 +14,7 @@ Trunk (unreleased changes)
     (Krishna Kumar via macyang)
     
   IMPROVEMENTS
+    HCAT-54. Javadoc is not being built as part of HCatalog docs (hashutosh) 
 
     HCAT-35. HCatalog fails to compile with Pig 0.9 (hashutosh)
 

Modified: incubator/hcatalog/trunk/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/build.xml?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/build.xml (original)
+++ incubator/hcatalog/trunk/build.xml Fri Jul  8 01:35:15 2011
@@ -339,7 +339,7 @@
   Docs Section
   ================================================================================
   -->
-  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation.
To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the
command line." if="forrest.home">
+  <target name="docs" depends="javadoc, forrest.check" description="Generate forrest-based
documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt;
on the command line." if="forrest.home">
     <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
           failonerror="true">
     </exec>
@@ -352,6 +352,18 @@
     <fail message="'forrest.home' is not defined. Please pass -Dforrest.home=&lt;base
of Apache Forrest installation&gt; to Ant on the command-line." />
   </target>
 
+  <target name="javadoc" depends="jar" description="Create documentation">
+      <mkdir dir="${build.javadoc}" />
+      <javadoc overview="${src.dir}/../docs/overview.html" packagenames="org.apache.hcatalog.*"
destdir="${build.javadoc}" author="true" version="true" use="true" windowtitle="HCatalog ${hcatalog.version}
API" doctitle="HCatalog ${hcatalog.version} API">
+          <packageset dir="${src.dir}" />
+          <classpath>
+              <path refid="classpath" />
+          </classpath>
+          <group title="hcatalog" packages="org.apache.hcatalog.*" />
+      </javadoc>
+  </target>
+
+
   <!--
   ===============================================================================
   Distribution Section

Added: incubator/hcatalog/trunk/src/docs/overview.html
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/docs/overview.html?rev=1144122&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/docs/overview.html (added)
+++ incubator/hcatalog/trunk/src/docs/overview.html Fri Jul  8 01:35:15 2011
@@ -0,0 +1,116 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>Overview </title>
+</head>
+<body> 
+<h1>Overview </h1>
+<div id="front-matter">
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#HCatalog">HCatalog </a>
+</li>
+<li>
+<a href="#HCatalog+Architecture">HCatalog Architecture</a>
+<ul class="minitoc">
+<li>
+<a href="#Interfaces">Interfaces</a>
+</li>
+<li>
+<a href="#Data+Model">Data Model</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Data+Flow+Example">Data Flow Example</a>
+</li>
+</ul>
+</div>
+</div>
+   
+<a name="HCatalog"></a>
+<h2 class="h3">HCatalog </h2>
+<div class="section">
+<p>HCatalog is a table management and storage management layer for Hadoop that enables
users with different data processing tools &ndash; Pig, MapReduce, Hive, Streaming &ndash;
to more easily read and write data on the grid. HCatalog&rsquo;s table abstraction presents
users with a relational view of data in the Hadoop distributed file system (HDFS) and ensures
that users need not worry about where or in what format their data is stored &ndash; RCFile
format, text files, sequence files. </p>
+<p>(Note: In this release, Streaming is not supported. Also, HCatalog supports only
writing RCFile formatted files and only reading PigStorage formated text files.)</p>
+<p></p>
+     
+      
+      
+<a name="HCatalog+Architecture"></a>
+<h2 class="h3">HCatalog Architecture</h2>
+<div class="section">
+<p>HCatalog is built on top of the Hive metastore and incorporates components from
the Hive DDL. HCatalog provides read and write interfaces for Pig and MapReduce and a command
line interface for data definitions.</p>
+<p>(Note: HCatalog notification is not available in this release.)</p>
+<p></p>
+<a name="Interfaces"></a>
+<h3 class="h4">Interfaces</h3>
+<p>The HCatalog interface for Pig &ndash; HCatLoader and HCatStorer &ndash;
is an implementation of the Pig load and store interfaces. HCatLoader accepts a table to read
data from; you can indicate which partitions to scan by immediately following the load statement
with a partition filter statement. HCatStorer accepts a table to write to and a specification
of partition keys to create a new partition. Currently HCatStorer only supports writing to
one partition. HCatLoader and HCatStorer are implemented on top of HCatInputFormat and HCatOutputFormat
respectively </p>
+<p>The HCatalog interface for MapReduce &ndash; HCatInputFormat and HCatOutputFormat
&ndash; is an implementation of Hadoop InputFormat and OutputFormat. HCatInputFormat accepts
a table to read data from and a selection predicate to indicate which partitions to scan.
HCatOutputFormat accepts a table to write to and a specification of partition keys to create
a new partition. Currently HCatOutputFormat only supports writing to one partition.</p>
+<p>
+<strong>Note:</strong> Currently there is no Hive-specific interface. Since HCatalog
uses Hive's metastore, Hive can read data in HCatalog directly as long as a SerDe for that
data already exists. In the future we plan to write a HCatalogSerDe so that users won't need
storage-specific SerDes and so that Hive users can write data to HCatalog. Currently, this
is supported - if a Hive user writes data in the RCFile format, it is possible to read the
data through HCatalog. </p>
+<p>Data is defined using HCatalog's command line interface (CLI). The HCatalog CLI
supports most of the DDL portion of Hive's query language, allowing users to create, alter,
drop tables, etc. The CLI also supports the data exploration part of the Hive command line,
such as SHOW TABLES, DESCRIBE TABLE, etc.</p>
+<a name="Data+Model"></a>
+<h3 class="h4">Data Model</h3>
+<p>HCatalog presents a relational view of data in HDFS. Data is stored in tables and
these tables can be placed in databases. Tables can also be hash partitioned on one or more
keys; that is, for a given value of a key (or set of keys) there will be one partition that
contains all rows with that value (or set of values). For example, if a table is partitioned
on date and there are three days of data in the table, there will be three partitions in the
table. New partitions can be added to a table, and partitions can be dropped from a table.
Partitioned tables have no partitions at create time. Unpartitioned tables effectively have
one default partition that must be created at table creation time. There is no guaranteed
read consistency when a partition is dropped.</p>
+<p>Partitions contain records. Once a partition is created records cannot be added
to it, removed from it, or updated in it. (In the future some ability to integrate changes
to a partition will be added.) Partitions are multi-dimensional and not hierarchical. Records
are divided into columns. Columns have a name and a datatype. HCatalog supports the same datatypes
as Hive. </p>
+</div>
+     
+  
+<a name="Data+Flow+Example"></a>
+<h2 class="h3">Data Flow Example</h2>
+<div class="section">
+<p>This simple data flow example shows how HCatalog is used to move data from the grid
into a database. 
+  From the database, the data can then be analyzed using Hive.</p>
+<p>
+<strong>First</strong> Joe in data acquisition uses distcp to get data onto the
grid.</p>
+<pre class="code">
+hadoop distcp file:///file.dat hdfs://data/rawevents/20100819/data
+
+hcat "alter table rawevents add partition 20100819 hdfs://data/rawevents/20100819/data"
+</pre>
+<p>
+<strong>Second</strong> Sally in data processing uses Pig to cleanse and prepare
the data.</p>
+<p>Without HCatalog, Sally must be manually informed by Joe that data is available,
or use Oozie and poll on HDFS.</p>
+<pre class="code">
+A = load '/data/rawevents/20100819/data' as (alpha:int, beta:chararray, &hellip;);
+B = filter A by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'data/processedevents/20100819/data';
+</pre>
+<p>With HCatalog, Oozie will be notified by HCatalog data is available and can then
start the Pig job</p>
+<pre class="code">
+A = load 'rawevents' using HCatLoader;
+B = filter A by date = '20100819' and by bot_finder(zeta) = 0;
+&hellip;
+store Z into 'processedevents' using HCatStorer("date=20100819");
+</pre>
+<p>
+<strong>Third</strong> Robert in client management uses Hive to analyze his clients'
results.</p>
+<p>Without HCatalog, Robert must alter the table to add the required partition. </p>
+<pre class="code">
+alter table processedevents add partition 20100819 hdfs://data/processedevents/20100819/data
+
+select advertiser_id, count(clicks)
+from processedevents
+where date = '20100819' 
+group by adverstiser_id;
+</pre>
+<p>With HCatalog, Robert does not need to modify the table structure.</p>
+<pre class="code">
+select advertiser_id, count(clicks)
+from processedevents
+where date = &lsquo;20100819&rsquo; 
+group by adverstiser_id;
+</pre>
+</div>
+  
+<div class="copyright">
+        Copyright &copy;
+         2011 <a href="http://www.apache.org/licenses/">The Apache Software Foundation</a>
+</div>
+</div>
+</body>
+</html>

Modified: incubator/hcatalog/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ incubator/hcatalog/trunk/src/docs/src/documentation/content/xdocs/site.xml Fri Jul  8
01:35:15 2011
@@ -45,6 +45,7 @@ See http://forrest.apache.org/docs/linki
     <index label="Cmd Line Interface " href="cli.html" />
     <index label="Supported data formats" href="supportedformats.html" />
     <index label="Installation" href="install.html" />
-    </docs>  
+    <api   label="API Docs" href="api/index.html"/>
+  </docs>  
 
 </site>

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
(original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java
Fri Jul  8 01:35:15 2011
@@ -39,10 +39,7 @@ public abstract class HCatBaseInputForma
   /**
    * get the schema for the HCatRecord data returned by HCatInputFormat.
    * 
-   * @param job
-   *          the job object
-   * @param hcatSchema
-   *          the schema to use as the consolidated schema
+   * @param context the jobContext
    * @throws IllegalArgumentException
    */
   public static HCatSchema getOutputSchema(JobContext context) throws Exception {

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java
(original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java
Fri Jul  8 01:35:15 2011
@@ -48,10 +48,7 @@ public class HCatEximInputFormat extends
    * the information in the conf object. The inputInfo object is updated with
    * information needed in the client context
    *
-   * @param job
-   *          the job object
-   * @param inputInfo
-   *          the table input info
+   * @param job the job object
    * @return two hcat schemas, for the table columns and the partition keys
    * @throws IOException
    *           the exception in communicating with the metadata server

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
(original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/HCatInputStorageDriver.java
Fri Jul  8 01:35:15 2011
@@ -45,7 +45,7 @@ public abstract class HCatInputStorageDr
 
   /**
    * Returns the InputFormat to use with this Storage Driver.
-   * @param properties the properties containing parameters required for initialization of
InputFormat
+   * @param hcatProperties the properties containing parameters required for initialization
of InputFormat
    * @return the InputFormat instance
    */
   public abstract InputFormat<? extends WritableComparable, ? extends Writable> getInputFormat(Properties
hcatProperties);
@@ -56,7 +56,7 @@ public abstract class HCatInputStorageDr
    * Implementers of StorageDriver should look to overwriting this function so as to convert
their
    * value type to HCatRecord. Default implementation is provided for StorageDriver implementations
    * on top of an underlying InputFormat that already uses HCatRecord as a tuple
-   * @param value the underlying value to convert to HCatRecord
+   * @param baseValue the underlying value to convert to HCatRecord
    */
   public abstract HCatRecord convertToHCatRecord(WritableComparable baseKey, Writable baseValue)
throws IOException;
 
@@ -130,7 +130,6 @@ public abstract class HCatInputStorageDr
    * the schema it has (like Zebra) or it will use this to create a HCatRecord matching the
output schema.
    * @param jobContext the job context object
    * @param hcatSchema the schema published in HCat for this data
-   * @param instantiationState
    * @throws IOException Signals that an I/O exception has occurred.
    */
   public abstract void setOriginalSchema(JobContext jobContext, HCatSchema hcatSchema) throws
IOException;
@@ -149,7 +148,6 @@ public abstract class HCatInputStorageDr
    * driver can add the partition key values to the output HCatRecord if the partition key
values are not present on disk.
    * @param jobContext the job context object
    * @param partitionValues the partition values having a map with partition key name as
key and the HCatKeyValue as value
-   * @param instantiationState
    * @throws IOException Signals that an I/O exception has occurred.
    */
   public abstract void setPartitionValues(JobContext jobContext, Map<String,String>
partitionValues) throws IOException;

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/JobInfo.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/JobInfo.java?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/JobInfo.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/mapreduce/JobInfo.java Fri Jul 
8 01:35:15 2011
@@ -40,7 +40,7 @@ public class JobInfo implements Serializ
 
     /**
      * Instantiates a new hcat job info.
-     * @param tableName the table name
+     * @param hcatTableInfo 
      * @param tableSchema the table schema
      * @param partitions the partitions
      */

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/PigHCatUtil.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/PigHCatUtil.java?rev=1144122&r1=1144121&r2=1144122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/PigHCatUtil.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/pig/PigHCatUtil.java Fri Jul  8
01:35:15 2011
@@ -229,7 +229,7 @@ public class PigHCatUtil {
   }
 
 /**
-   * @param type hcat column type
+   * @param hfs the field schema of the column
    * @return corresponding pig type
    * @throws IOException
    */



Mime
View raw message