pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r671269 - in /incubator/pig/branches/types: docs/ src/org/apache/pig/ src/org/apache/pig/backend/datastorage/ src/org/apache/pig/backend/executionengine/ src/org/apache/pig/builtin/ src/org/apache/pig/data/ src/org/apache/pig/impl/ src/org/...
Date Tue, 24 Jun 2008 17:31:02 GMT
Author: gates
Date: Tue Jun 24 10:31:00 2008
New Revision: 671269

URL: http://svn.apache.org/viewvc?rev=671269&view=rev
Log:
Java doc fixes.  Removed most java doc warnings.  Updated overview.html and added several package.html files.


Added:
    incubator/pig/branches/types/src/org/apache/pig/data/package.html
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/package.html
    incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/package.html
    incubator/pig/branches/types/src/org/apache/pig/package.html
Modified:
    incubator/pig/branches/types/docs/overview.html
    incubator/pig/branches/types/src/org/apache/pig/Algebraic.java
    incubator/pig/branches/types/src/org/apache/pig/EvalFunc.java
    incubator/pig/branches/types/src/org/apache/pig/LoadFunc.java
    incubator/pig/branches/types/src/org/apache/pig/PigServer.java
    incubator/pig/branches/types/src/org/apache/pig/backend/datastorage/ElementDescriptor.java
    incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecJob.java
    incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecutionEngine.java
    incubator/pig/branches/types/src/org/apache/pig/builtin/TextLoader.java
    incubator/pig/branches/types/src/org/apache/pig/data/BagFactory.java
    incubator/pig/branches/types/src/org/apache/pig/data/DataBag.java
    incubator/pig/branches/types/src/org/apache/pig/data/DataByteArray.java
    incubator/pig/branches/types/src/org/apache/pig/data/DataType.java
    incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java
    incubator/pig/branches/types/src/org/apache/pig/data/DefaultBagFactory.java
    incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java
    incubator/pig/branches/types/src/org/apache/pig/impl/PigContext.java
    incubator/pig/branches/types/src/org/apache/pig/impl/io/FileLocalizer.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/ExpressionOperator.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOFilter.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOForEach.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOGenerate.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOMapLookup.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOProject.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LORegexp.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSort.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplit.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplitOutput.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOStore.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOUserFunc.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/UnaryExpressionOperator.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
    incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
    incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/MRCompiler.java
    incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/expressionOperators/POProject.java
    incubator/pig/branches/types/src/org/apache/pig/impl/plan/Operator.java
    incubator/pig/branches/types/src/org/apache/pig/impl/plan/OperatorPlan.java
    incubator/pig/branches/types/src/org/apache/pig/impl/plan/PlanVisitor.java
    incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Rule.java
    incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Transformer.java
    incubator/pig/branches/types/src/org/apache/pig/impl/util/JarManager.java
    incubator/pig/branches/types/src/org/apache/pig/impl/util/Spillable.java
    incubator/pig/branches/types/src/org/apache/pig/tools/cmdline/CmdLineParser.java
    incubator/pig/branches/types/src/org/apache/pig/tools/parameters/PreprocessorContext.java

Modified: incubator/pig/branches/types/docs/overview.html
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/docs/overview.html?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/docs/overview.html (original)
+++ incubator/pig/branches/types/docs/overview.html Tue Jun 24 10:31:00 2008
@@ -1,13 +1,45 @@
 <HTML>
 <BODY>
-Provides the classes necessary to create programs in the high-level Pig Latin
-language and the infrastructure for evaluating these programs. 
+Pig is a platform for a data flow programming on large data sets in a parallel
+environment.  It consists of a language to specify these programs, 
+<a href="http://wiki.apache.org/pig/PigLatin">Pig Latin</a>,
+a compiler for this language, and an execution engine to execute the programs.
 <p>
-Pig is a platform for analyzing large data sets and consists of 2 layers:
-Pig's infrastructure layer consists of a compiler that produces sequences of 
-Map-Reduce programs, for which large-scale parallel implementations already 
-exist (e.g., the Hadoop project). Pig's language layer currently consists of 
-a textual language called Pig Latin for expressing data analysis programs.
+Pig currently runs on the <a href="http://hadoop.apache.org/core/">hadoop</a>
+platform, reading data from and writing data to hdfs, and doing processing via
+one or more map-reduce jobs.
+
+<h2> Design </h2>
+This section gives a very high overview of the design of the Pig system.  
+Throughout the documents you can see design for that package or class by 
+looking for the Design heading in the documentation.
+
+<h3> Overview </h3>
+<p>
+Pig's design is guided by our <a href="http://incubator.apache.org/pig/philosophy.html">
+pig philosophy</a> and by our experience with similar data processing 
+systems.
+<p>
+Pig shares many similarities with a traditional RDBMS design.  It has a parser,
+type checker, optimizer, and operators that perform the data processing.  However,
+there are some 
+significant differences.  Pig does not have a data catalog, there are no
+transactions, pig does not directly manage data storage, nor does it implement the 
+execution framework.
+<p>
+<h3> High Level Architecture </h3>
+Pig is split between the front and back ends of the engine.  The front end handles
+parsing, checking, and doing initial optimization on a Pig Latin script.  The
+result is a {@link org.apache.pig.impl.logicalLayer.LogicalPlan} that defines how
+the script will be executed.
+<p>
+Once a LogicalPlan has been generated, the backend of Pig handles executing the
+script.  Pig supports multiple different
+backend implementations, in order to allow Pig to run on different systems.  
+Currently pig comes with two backends, Map-Reduce and local.  For a given run,
+pig selects the backend to use via configuration.
+
+
 </BODY>
 </HTML>
 

Modified: incubator/pig/branches/types/src/org/apache/pig/Algebraic.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/Algebraic.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/Algebraic.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/Algebraic.java Tue Jun 24 10:31:00 2008
@@ -18,7 +18,9 @@
 package org.apache.pig;
 
 /**
- * Suppose we have to compute an function f over a bag X. In general, we need to know the entire X
+ * An interface to declare that an EvalFunc's 
+ * calculation can be decomposed into intitial, intermediate, and final steps.
+ * More formally, suppose we have to compute an function f over a bag X. In general, we need to know the entire X
  * before we can make any progress on f. However, some functions are <i>algebraic</i> e.g. SUM. In
  * these cases, you can apply some initital function f_init on subsets of X to get partial results. 
  * You can then combine partial results from different subsets of X using an intermediate function

Modified: incubator/pig/branches/types/src/org/apache/pig/EvalFunc.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/EvalFunc.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/EvalFunc.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/EvalFunc.java Tue Jun 24 10:31:00 2008
@@ -139,8 +139,9 @@
     
     /**
      * This function should be overriden to return true for functions that return their values
-     * asynchronously. 
-     * @return
+     * asynchronously.  Currently pig never attempts to execute a function
+     * asynchronously.
+     * @return true if the function can be executed asynchronously.
      */
     public boolean isAsynchronous(){
         return false;

Modified: incubator/pig/branches/types/src/org/apache/pig/LoadFunc.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/LoadFunc.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/LoadFunc.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/LoadFunc.java Tue Jun 24 10:31:00 2008
@@ -74,7 +74,7 @@
     
     /**
      * Cast data from bytes to integer value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Integer value.
      * @throws IOException if the value cannot be cast.
      */
@@ -82,7 +82,7 @@
 
     /**
      * Cast data from bytes to long value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Long value.
      * @throws IOException if the value cannot be cast.
      */
@@ -90,7 +90,7 @@
 
     /**
      * Cast data from bytes to float value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Float value.
      * @throws IOException if the value cannot be cast.
      */
@@ -98,7 +98,7 @@
 
     /**
      * Cast data from bytes to double value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Double value.
      * @throws IOException if the value cannot be cast.
      */
@@ -106,7 +106,7 @@
 
     /**
      * Cast data from bytes to chararray value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return String value.
      * @throws IOException if the value cannot be cast.
      */
@@ -114,7 +114,7 @@
 
     /**
      * Cast data from bytes to map value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Map value.
      * @throws IOException if the value cannot be cast.
      */
@@ -122,7 +122,7 @@
 
     /**
      * Cast data from bytes to tuple value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Tuple value.
      * @throws IOException if the value cannot be cast.
      */
@@ -130,7 +130,7 @@
 
     /**
      * Cast data from bytes to bag value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return Bag value.
      * @throws IOException if the value cannot be cast.
      */

Modified: incubator/pig/branches/types/src/org/apache/pig/PigServer.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/PigServer.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/PigServer.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/PigServer.java Tue Jun 24 10:31:00 2008
@@ -139,7 +139,7 @@
      * is useful for functions that require arguments to the 
      * constructor.
      * 
-     * @param aliases - the new function alias to define.
+     * @param function - the new function alias to define.
      * @param functionSpec - the name of the function and any arguments.
      * It should have the form: classname('arg1', 'arg2', ...)
      */
@@ -208,7 +208,6 @@
      * 
      * @param query
      *            a Pig Latin expression to be evaluated.
-     * @return a handle to the query.
      * @throws IOException
      */
     public void registerQuery(String query) throws IOException {
@@ -271,8 +270,8 @@
     
     /**
      * Store an alias into a file
-     * @param id: The alias to store
-     * @param filename: The file to which to store to
+     * @param id The alias to store
+     * @param filename The file to which to store to
      * @throws IOException
      */
 
@@ -355,7 +354,7 @@
      * to file. Thus if you are using this to determine if you data set will fit
      * in the HDFS, you need to divide the result of this call by your specific replication
      * setting. 
-     * @return
+     * @return unused byte capacity of the file system.
      * @throws IOException
      */
     public long capacity() throws IOException {
@@ -384,7 +383,7 @@
     /**
      * Returns the length of a file in bytes which exists in the HDFS (accounts for replication).
      * @param filename
-     * @return
+     * @return length of the file in bytes
      * @throws IOException
      */
     public long fileSize(String filename) throws IOException {

Modified: incubator/pig/branches/types/src/org/apache/pig/backend/datastorage/ElementDescriptor.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/backend/datastorage/ElementDescriptor.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/backend/datastorage/ElementDescriptor.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/backend/datastorage/ElementDescriptor.java Tue Jun 24 10:31:00 2008
@@ -106,7 +106,6 @@
         /**
          * Checks whether the entity exists or not
          * 
-         * @param name of entity
          * @return true if entity exists, false otherwise.
          */
         public boolean exists() throws IOException;

Modified: incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecJob.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecJob.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecJob.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecJob.java Tue Jun 24 10:31:00 2008
@@ -50,7 +50,7 @@
      * true is the physical plan has executed successfully and results are ready
      * to be retrieved
      * 
-     * @return
+     * @return true if execution has completed, false otherwise.
      * @throws ExecException
      */
     public boolean hasCompleted() throws ExecException;
@@ -59,7 +59,7 @@
      * if query has executed successfully we want to retrieve the results
      * via iterating over them. 
      * 
-     * @return
+     * @return iterator for resulting tuples
      * @throws ExecException
      */
     public Iterator<Tuple> getResults() throws ExecException;
@@ -67,7 +67,7 @@
     /**
      * Get configuration information
      * 
-     * @return
+     * @return configuration information for the execution engine
      */    
     public Properties getContiguration();
 
@@ -76,7 +76,7 @@
      * has not been called yet; not running, e.g. execute has been issued, 
      * but job is waiting; running...; completed; aborted...; progress information
      * 
-     * @return
+     * @return statistics relevant to the execution engine
      */
     public Map<String, Object> getStatistics();
 

Modified: incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecutionEngine.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecutionEngine.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecutionEngine.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/backend/executionengine/ExecutionEngine.java Tue Jun 24 10:31:00 2008
@@ -86,7 +86,7 @@
      * parallelism for this plan, which could be different from the "default"
      * one set at the execution engine level.
      * 
-     * @param logical plan
+     * @param plan logical plan to compile
      * @param properties
      * @return physical plan
      */
@@ -129,7 +129,7 @@
     /**
      * Return currently running jobs (can be useful for admin purposes)
      * 
-     * @return
+     * @return All jobs that are currently active in hte execution engine.
      * @throws ExecException
      */
     public Collection<ExecJob> runningJobs(Properties properties) throws ExecException;
@@ -137,7 +137,8 @@
     /**
      * List scopes that are active in the back-end
      * 
-     * @return
+     * @return All scopes that are currently active in the execution
+     * engine.
      * @throws ExecException
      */
     public Collection<String> activeScopes() throws ExecException;

Modified: incubator/pig/branches/types/src/org/apache/pig/builtin/TextLoader.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/builtin/TextLoader.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/builtin/TextLoader.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/builtin/TextLoader.java Tue Jun 24 10:31:00 2008
@@ -103,7 +103,7 @@
 
     /**
      * Cast data from bytes to chararray value.  
-     * @param bytes byte array to be cast.
+     * @param b byte array to be cast.
      * @return String value.
      * @throws IOException if the value cannot be cast.
      */

Modified: incubator/pig/branches/types/src/org/apache/pig/data/BagFactory.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/BagFactory.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/BagFactory.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/BagFactory.java Tue Jun 24 10:31:00 2008
@@ -26,15 +26,15 @@
 import org.apache.pig.impl.util.SpillableMemoryManager;
 
 /**
- * A bag factory.  Can be used to generate different types of bags
- * depending on what is needed.  This class is abstract so that users can
+ * Factory for constructing different types of bags.
+ * This class is abstract so that users can
  * override the bag factory if they desire to provide their own that
  * returns their implementation of a bag.  If the property
  * pig.data.bag.factory.name is set to a class name and
  * pig.data.bag.factory.jar is set to a URL pointing to a jar that
  * contains the above named class, then getInstance() will create a
  * a instance of the named class using the indicatd jar.  Otherwise, it
- * will create and instance of DefaultBagFactory.
+ * will create an instance of DefaultBagFactory.
  */
 public abstract class BagFactory {
     private static BagFactory gSelf = null;

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DataBag.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DataBag.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DataBag.java Tue Jun 24 10:31:00 2008
@@ -64,7 +64,7 @@
  * return to this issue, as synchronizing reads will most likely defeat the
  * purpose of multi-threading execution.
  *
- * DataBag come in several types, default, sorted, and distinct.  The type
+ * DataBags come in several types, default, sorted, and distinct.  The type
  * must be chosen up front, there is no way to convert a bag on the fly.
  */
 public interface DataBag extends Spillable, WritableComparable, Iterable<Tuple>, Serializable {

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DataByteArray.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DataByteArray.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DataByteArray.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DataByteArray.java Tue Jun 24 10:31:00 2008
@@ -51,7 +51,7 @@
      * @param b byte array to read from.  A copy of the underlying bytes will be
      * made.
      * @param start starting point to copy from
-     * @param ending point to copy to, exclusive.
+     * @param end ending point to copy to, exclusive.
      */
     public DataByteArray(byte[] b, int start, int end) {
         mData = new byte[end - start];

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DataType.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DataType.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DataType.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DataType.java Tue Jun 24 10:31:00 2008
@@ -34,9 +34,11 @@
 import org.apache.pig.backend.executionengine.ExecException;
 
 /**
- * A class of static final values used to encode data type.  This could be
- * done as an enumeration, but it done as byte codes instead to save
- * creating objects.  A few utility functions are also included.
+ * A class of static final values used to encode data type and a number of
+ * static helper funcitons for manipulating data objects.  The data type
+ * values could be
+ * done as an enumeration, but it is done as byte codes instead to save
+ * creating objects.
  */
 public class DataType {
     // IMPORTANT! This list can be used to record values of data on disk,
@@ -454,18 +456,18 @@
     }
 
     /**
-     * Determine whether the this data type has a schema.
+     * Determine whether the this object can have a schema.
      * @param o Object to determine if it has a schema
-     * @return true if the type can have a alid schema (i.e., bag or tuple)
+     * @return true if the type can have a valid schema (i.e., bag or tuple)
      */
     public static boolean isSchemaType(Object o) {
         return isSchemaType(findType(o));
     }
 
     /**
-     * Determine whether the this data type has a schema.
-     * @param o Object to determine if it has a schema
-     * @return true if the type can have a alid schema (i.e., bag or tuple)
+     * Determine whether the this data type can have a schema.
+     * @param dataType dataType to determine if it has a schema
+     * @return true if the type can have a valid schema (i.e., bag or tuple)
      */
     public static boolean isSchemaType(byte dataType) {
         return ((dataType == BAG) || (dataType == TUPLE)); 

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DefaultAbstractBag.java Tue Jun 24 10:31:00 2008
@@ -33,43 +33,8 @@
 import org.apache.pig.impl.util.Spillable;
 
 /**
- * A collection of Tuples.  A DataBag may or may not fit into memory.
- * DataBag extends spillable, which means that it registers with a memory
- * manager.  By default, it attempts to keep all of its contents in memory.
- * If it is asked by the memory manager to spill to disk (by a call to
- * spill()), it takes whatever it has in memory, opens a spill file, and
- * writes the contents out.  This may happen multiple times.  The bag
- * tracks all of the files it's spilled to.
- * 
- * DataBag provides an Iterator interface, that allows callers to read
- * through the contents.  The iterators are aware of the data spilling.
- * They have to be able to handle reading from files, as well as the fact
- * that data they were reading from memory may have been spilled to disk
- * underneath them.
- *
- * The DataBag interface assumes that all data is written before any is
- * read.  That is, a DataBag cannot be used as a queue.  If data is written
- * after data is read, the results are undefined.  This condition is not
- * checked on each add or read, for reasons of speed.  Caveat emptor.
- *
- * Since spills are asynchronous (the memory manager requesting a spill
- * runs in a separate thread), all operations dealing with the mContents
- * Collection (which is the collection of tuples contained in the bag) have
- * to be synchronized.  This means that reading from a DataBag is currently
- * serialized.  This is ok for the moment because pig execution is
- * currently single threaded.  A ReadWriteLock was experimented with, but
- * it was found to be about 10x slower than using the synchronize keyword.
- * If pig changes its execution model to be multithreaded, we may need to
- * return to this issue, as synchronizing reads will most likely defeat the
- * purpose of multi-threading execution.
- *
- * DataBag come in several types, default, sorted, and distinct.  The type
- * must be chosen up front, there is no way to convert a bag on the fly.
- * 
- * This is the default implementation.  Users are free to provide their
- * own implementation, but they should keep in mind the need to support
- * bags that do not fit in memory, and handle spilling in an efficient
- * manner.
+ * Default implementation of DataBag.  This is the an abstract class used as a
+ * parent for all three of the types of data bags.
  */
 public abstract class DefaultAbstractBag implements DataBag {
     // Container that holds the tuples. Actual object instantiated by

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DefaultBagFactory.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DefaultBagFactory.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DefaultBagFactory.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DefaultBagFactory.java Tue Jun 24 10:31:00 2008
@@ -22,8 +22,7 @@
 import org.apache.pig.impl.util.SpillableMemoryManager;
 
 /**
- * A bag factory.  Can be used to generate different types of bags
- * depending on what is needed.
+ * Default implementation of BagFactory.
  */
 public class DefaultBagFactory extends BagFactory {
     /**

Modified: incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/data/DefaultTupleFactory.java Tue Jun 24 10:31:00 2008
@@ -23,8 +23,7 @@
 import org.apache.pig.backend.executionengine.ExecException;
 
 /**
- * A bag factory.  Can be used to generate different types of bags
- * depending on what is needed.
+ * Default implementation of TupleFactory.
  */
 public class DefaultTupleFactory extends TupleFactory {
     public Tuple newTuple() {

Added: incubator/pig/branches/types/src/org/apache/pig/data/package.html
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/data/package.html?rev=671269&view=auto
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/data/package.html (added)
+++ incubator/pig/branches/types/src/org/apache/pig/data/package.html Tue Jun 24 10:31:00 2008
@@ -0,0 +1,51 @@
+<html>
+<body>
+
+<p>
+Data types for Pig.
+<p>
+This package contains implementations of Pig specific data types as well as
+support functions for reading, writing, and using all Pig data types.
+<p>
+Whenever possible, Pig utilizes Java provided data types.  These include
+Integer, Long, Float, Double, Boolean, String, and Map.  Tuple, Bag, and
+DataByteArray are implemented in this package.  
+
+<h2> Design </h2>
+<p>
+The choice was made to utilize Java provided types for two main reasons.  One,
+it minimizes the burden on UDF developers, as they will have full access to
+these types with no need to convert to and from Pig specific types.  Two,
+maintenance costs will be lower as there is no need to implement and maintain
+Pig specific data classes.  The drawback is that the only common parent of all
+these types is Object.  Thus Pig is often required to treat its data objects
+as Objects and then implement static methods to manipulate these Objects,
+rather than being able to define a PigDatum class with common funcitons.
+<p>
+Three data types were implemented as Pig specific classes: 
+{@link org.apache.pig.data.DataByteArray}, {@link org.apache.pig.data.Tuple},
+and {@link org.apache.pig.data.DataBag}.  
+<p>
+DataByteArray represents an array of bytes, with no interpretation of those
+bytes provided or assumed.  This could have been represented as byte[], but a
+separate class was constructed to provide common functions needed to
+manipulate these objects.
+<p>
+Tuple represents an ordered collection of data elements.  Every field in a
+tuple can contain any Pig data type.  Tuple is presented as an interface to
+allow differing implementations in cases where users have unique
+representations of their data that they wish to preserve in their in memory
+representations.  The {@link org.apache.pig.data.TupleFactory} is an
+abstract class, to enable a user who has defined his own tuples to provide a
+factory that creates those tuples.  Default implementations of Tuple and
+TupleFactory are provided and used by default. 
+<p>
+DataBag represents a collection of Tuples.  DataBags can be of default type
+(no extra features), sorted (tuples are sorted according to a provided
+comparator function), or distinct (no duplicate tuples).  As with Tuple,
+DataBag is presented as an interface, and 
+{@link org.apache.pig.data.BagFactory} is an abstract class.  Default implementations of DataBag,
+BagFactory, and all three types of bags are provided.
+
+</body>
+</html>

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/PigContext.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/PigContext.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/PigContext.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/PigContext.java Tue Jun 24 10:31:00 2008
@@ -300,7 +300,7 @@
      * is useful for functions that require arguments to the 
      * constructor.
      * 
-     * @param aliases - the new function alias to define.
+     * @param function - the new function alias to define.
      * @param functionSpec - the name of the function and any arguments.
      * It should have the form: classname('arg1', 'arg2', ...)
      */
@@ -315,7 +315,7 @@
     /**
      * Returns the type of execution currently in effect.
      * 
-     * @return
+     * @return current execution type
      */
     public ExecType getExecType() {
         return execType;

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/io/FileLocalizer.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/io/FileLocalizer.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/io/FileLocalizer.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/io/FileLocalizer.java Tue Jun 24 10:31:00 2008
@@ -138,7 +138,7 @@
     /**
      * This function is meant to be used if the mappers/reducers want to access any HDFS file
      * @param fileName
-     * @return
+     * @return InputStream of the open file.
      * @throws IOException
      */
     

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/ExpressionOperator.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/ExpressionOperator.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/ExpressionOperator.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/ExpressionOperator.java Tue Jun 24 10:31:00 2008
@@ -77,9 +77,9 @@
      * Set the output schema for this operator. If a schema already exists, an
      * attempt will be made to reconcile it with this new schema.
      * 
-     * @param schema
-     *            Schema to set.
-     * @throws ParseException
+     * @param fs
+     *            FieldSchema to set.
+     * @throws FrontendException
      *             if there is already a schema and the existing schema cannot
      *             be reconciled with this new schema.
      */

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOCogroup.java Tue Jun 24 10:31:00 2008
@@ -52,9 +52,11 @@
      * 
      * @param plan
      *            LogicalPlan this operator is a part of.
-     * @param key
+     * @param k
      *            OperatorKey for this operator
-     * @param groupByCols
+     * @param inputs
+     *            List of input operators
+     * @param groupByPlans
      *            the group by columns
      */
     public LOCogroup(LogicalPlan plan, OperatorKey k,

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOFilter.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOFilter.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOFilter.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOFilter.java Tue Jun 24 10:31:00 2008
@@ -41,7 +41,7 @@
      *            Logical plan this operator is a part of.
      * @param k
      *            Operator key to assign to this node.
-     * @param cond
+     * @param comparisonPlan
      *            the filter condition
      * @param input
      *            the input that needs filtering

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOForEach.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOForEach.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOForEach.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOForEach.java Tue Jun 24 10:31:00 2008
@@ -47,7 +47,7 @@
      *            Logical plan this operator is a part of.
      * @param k
      *            Operator key to assign to this node.
-     * @param operators
+     * @param foreachPlan
      *            the list of operators that are applied for each input
      */
 

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOGenerate.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOGenerate.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOGenerate.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOGenerate.java Tue Jun 24 10:31:00 2008
@@ -48,10 +48,12 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
-     * @param projections
-     *            the projection list of the generate
+     * @param generatePlans
+     *            Plans for each projection element
+     * @param flatten
+     *            Whether to flatten each projection element
      */
 
     public LOGenerate(LogicalPlan plan, OperatorKey key,
@@ -65,10 +67,12 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
-     * @param projection
+     * @param generatePlan
      *            the projection of the generate
+     * @param flatten
+     *            whether the result needs to be flattened
      */
 
     public LOGenerate(LogicalPlan plan, OperatorKey key,

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOMapLookup.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOMapLookup.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOMapLookup.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOMapLookup.java Tue Jun 24 10:31:00 2008
@@ -46,7 +46,7 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
      * @param map
      *            the map expression

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOProject.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOProject.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOProject.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOProject.java Tue Jun 24 10:31:00 2008
@@ -69,7 +69,7 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
      * @param exp
      *            the expression which might contain the column to project
@@ -92,7 +92,7 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
      * @param exp
      *            the expression which might contain the column to project

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LORegexp.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LORegexp.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LORegexp.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LORegexp.java Tue Jun 24 10:31:00 2008
@@ -41,12 +41,12 @@
      * 
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param k
+     * @param key
      *            Operator key to assign to this node.
-     * @param exp
-     *            the expression which might contain the column to project
-     * @param projection
-     *            the list of columns to project
+     * @param operand
+     *            input expression to be tested against
+     * @param regexp
+     *            regular expression to match
      */
     public LORegexp(LogicalPlan plan, OperatorKey key,
             ExpressionOperator operand, String regexp) {

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSort.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSort.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSort.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSort.java Tue Jun 24 10:31:00 2008
@@ -48,17 +48,22 @@
      *            OperatorKey for this operator
      * @param input
      *            Input to sort
-     * @param sortCols
+     * @param sortColPlans
      *            Array of column numbers that will be used for sorting data.
      * @param ascCols
      *            Array of booleans. Should be same size as sortCols. True
      *            indicates sort ascending (default), false sort descending. If
      *            this array is null, then all columns will be sorted ascending.
-     * @param sorFunc
+     * @param sortFunc
      *            the user defined sorting function
      */
-    public LOSort(LogicalPlan plan, OperatorKey key, LogicalOperator input,
-            List<LogicalPlan> sortColPlans, List<Boolean> ascCols, String sortFunc) {
+    public LOSort(
+            LogicalPlan plan,
+            OperatorKey key,
+            LogicalOperator input,
+            List<LogicalPlan> sortColPlans,
+            List<Boolean> ascCols,
+            String sortFunc) {
         super(plan, key);
         mInput = input;
         mSortColPlans = sortColPlans;

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplit.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplit.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplit.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplit.java Tue Jun 24 10:31:00 2008
@@ -35,7 +35,6 @@
 public class LOSplit extends LogicalOperator {
     private static final long serialVersionUID = 2L;
 
-    //private Map<String, LogicalPlan> mCondPlans;
     private ArrayList<LogicalOperator> mOutputs;
     private static Log log = LogFactory.getLog(LOSplit.class);
 
@@ -46,33 +45,17 @@
      *            OperatorKey for this operator
      * @param outputs
      *            list of aliases that are the output of the split
-     * @param conditions
-     *            list of conditions for the split
      */
     public LOSplit(LogicalPlan plan, OperatorKey key,
             ArrayList<LogicalOperator> outputs) {
-           // Map<String, LogicalPlan> condPlans) {
         super(plan, key);
         mOutputs = outputs;
-        //mCondPlans = condPlans;
     }
 
     public List<LogicalOperator> getOutputs() {
         return mOutputs;
     }
-/*
-    public Collection<LogicalPlan> getConditionPlans() {
-        return mCondPlans.values();
-    }
-
-    public Set<String> getOutputAliases() {
-        return mCondPlans.keySet();
-    }
 
-    public void addOutputAlias(String output, LogicalPlan cond) {
-        mCondPlans.put(output, cond);
-    }
-*/
     public void addOutput(LogicalOperator lOp) {
         mOutputs.add(lOp);
     }

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplitOutput.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplitOutput.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplitOutput.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOSplitOutput.java Tue Jun 24 10:31:00 2008
@@ -41,12 +41,16 @@
      *            LogicalPlan this operator is a part of.
      * @param key
      *            OperatorKey for this operator
-     * @param outputs
-     *            list of aliases that are the output of the split
-     * @param conditions
-     *            list of conditions for the split
+     * @param index
+     *            index of this output in the split
+     * @param condPlan
+     *            logical plan containing the condition for this split output
      */
-    public LOSplitOutput(LogicalPlan plan, OperatorKey key, int index, LogicalPlan condPlan) {
+    public LOSplitOutput(
+            LogicalPlan plan,
+            OperatorKey key,
+            int index,
+            LogicalPlan condPlan) {
         super(plan, key);
         this.mIndex = index;
         this.mCondPlan = condPlan;

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOStore.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOStore.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOStore.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOStore.java Tue Jun 24 10:31:00 2008
@@ -47,8 +47,6 @@
      *            OperatorKey for this operator
      * @param outputFileSpec
      *            the file to be stored
-     * @param storeFunc
-     *            the store function, pre-defined or user defined
      */
     public LOStore(LogicalPlan plan, OperatorKey key,
             FileSpec outputFileSpec) throws IOException {

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOUserFunc.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOUserFunc.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOUserFunc.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LOUserFunc.java Tue Jun 24 10:31:00 2008
@@ -35,7 +35,7 @@
      *            LogicalPlan this operator is a part of.
      * @param k
      *            OperatorKey for this operator.
-     * @param funcName
+     * @param funcSpec
      *            name of the user defined function.
      * @param args
      *            List of expressions that form the arguments for this function.

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java Tue Jun 24 10:31:00 2008
@@ -93,10 +93,8 @@
     /**
      * @param plan
      *            Logical plan this operator is a part of.
-     * @param -
-     *            k Operator key to assign to this node.
-     * @param =
-     *            rp degree of requested parallelism with which to execute this
+     * @param k Operator key to assign to this node.
+     * @param rp degree of requested parallelism with which to execute this
      *            node.
      */
     public LogicalOperator(LogicalPlan plan, OperatorKey k, int rp) {
@@ -159,7 +157,7 @@
      * Set the type of this operator. This should only be called by the type
      * checking routines.
      * 
-     * @param type -
+     * @param t 
      *            Type to set this operator to.
      */
     final public void setType(byte t) {

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/UnaryExpressionOperator.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/UnaryExpressionOperator.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/UnaryExpressionOperator.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/UnaryExpressionOperator.java Tue Jun 24 10:31:00 2008
@@ -45,8 +45,6 @@
      *            node.
      * @param operand
      *            ExpressionOperator the left hand side operand
-     * @param operator
-     *            LogicalExperssion the actual operator
      */
     public UnaryExpressionOperator(LogicalPlan plan, OperatorKey k, int rp,
             ExpressionOperator operand) {
@@ -61,8 +59,6 @@
      *            Operator key to assign to this node.
      * @param operand
      *            ExpressionOperator the left hand side operand
-     * @param operator
-     *            LogicalExperssion the actual operator
      */
     public UnaryExpressionOperator(LogicalPlan plan, OperatorKey k,
             ExpressionOperator operand) {

Added: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/package.html
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/package.html?rev=671269&view=auto
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/package.html (added)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/package.html Tue Jun 24 10:31:00 2008
@@ -0,0 +1,41 @@
+<html>
+<body>
+
+<p>
+The logical operators that represent a pig script and tools for manipulating
+those operators.  The logical layer contains the logical operators themselves,
+as well as validators that check the logical plan, an optimizer, and a general
+visitor utility for working with the logical plans.
+
+<h2> Design </h2>
+<p>
+Logical operators use the operator, plan, visitor, and optimizer framework
+provided by the {@link org.apache.pig.impl.plan} package.
+<p>
+Logical operators consist of both relational and expression operators.
+Relational operators work on an entire bag.  Expression operators work on an
+element of a tuple (which may also be a bag).  Due to Pig's nested data and
+execution model the distinction between relational and expression operators is
+not always clear.  And some operators such as LOProject function as both.
+<p>
+In a traditional data base system, a query execution plan is constructed from
+relational operators, such as project, filter, sort, aggregate, join.  Each of
+these may contain an expression tree, made up of expression operators.  For
+example, consider a SQL query <code>select a from T where a = 5;</code>.  The
+where clause would be represented by a filter operator with an expression tree
+for <code>a=5</code>.
+<p>
+Pig takes a similar approach, except that the operators contained inside of a
+relational operator may also be relational.  For example, a foreach statement
+that has a nested script, such as <code>foreach B { C = distinct $1; generate
+group, COUNT(C);}</code>.  This foreach needs to contain not just an
+expression tree but the distinct relational operator.  For this reason, Pig's
+relational operators do not contain expression trees.  Instead they contain
+one or more LogicalPlans themselves.  This allows Pig to arbitrarily nest
+the logical plan.  In this sense Pig is more similar to a traditional
+procedural language where certain statements (e.g. if, while) can contain any
+other statement in the language rather than being like SQL where the statement
+execution tends to be more linear.
+
+</body>
+</html>

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/schema/Schema.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/schema/Schema.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/schema/Schema.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/schema/Schema.java Tue Jun 24 10:31:00 2008
@@ -136,11 +136,13 @@
         /**
          * Recursively compare two schemas to check if the input schema 
          * can be cast to the cast schema
-         * @param cast schema of the cast operator
-         * @param  input schema of the cast input
+         * @param castFs schema of the cast operator
+         * @param  inputFs schema of the cast input
          * @return true or falsew!
          */
-        public static boolean castable(Schema.FieldSchema castFs, Schema.FieldSchema inputFs) {
+        public static boolean castable(
+                Schema.FieldSchema castFs,
+                Schema.FieldSchema inputFs) {
             if(castFs == null && inputFs == null) {
                 return false;
             }
@@ -204,7 +206,7 @@
          * @param fother
          * @param relaxInner If true, we don't check inner tuple schemas
          * @param relaxAlias If true, we don't check aliases
-         * @return
+         * @return true if FieldSchemas are equal, false otherwise
          */
         public static boolean equals(FieldSchema fschema,
                                      FieldSchema fother,
@@ -619,7 +621,8 @@
      * @param schema
      * @param other
      * @param relaxInner if true, inner schemas will not be checked
-     * @return
+     * @param relaxAlias if true, aliases will not be checked
+     * @return true if schemas are equal, false otherwise
      */
     public static boolean equals(Schema schema,
                                  Schema other,

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java Tue Jun 24 10:31:00 2008
@@ -1818,11 +1818,11 @@
         }
     }
 
-        /**
+    /**
      * This can be used to get the merged type of output group col
      * only when the group col is of atomic type
      * TODO: This doesn't work with group by complex type
-     * @return
+     * @return The type of the group by
      */
     public byte getAtomicGroupByType(LOCogroup cg) throws VisitorException {
         if (cg.isTupleGroupCol()) {

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/MRCompiler.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/MRCompiler.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/MRCompiler.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/mapReduceLayer/MRCompiler.java Tue Jun 24 10:31:00 2008
@@ -152,7 +152,7 @@
     
     /**
      * Used to get the compiled plan
-     * @return
+     * @return map reduce plan built by the compiler
      */
     public MROperPlan getMRPlan() {
         return MRPlan;
@@ -160,7 +160,7 @@
     
     /**
      * Used to get the plan that was compiled
-     * @return
+     * @return physical plan
      */
     public PhysicalPlan<PhysicalOperator> getPlan() {
         return plan;
@@ -170,7 +170,7 @@
      * The front-end method that the user calls to compile
      * the plan. Assumes that all submitted plans have a Store
      * operators as the leaf.
-     * @return
+     * @return A map reduce plan
      * @throws IOException
      * @throws PlanException
      * @throws VisitorException

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/expressionOperators/POProject.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/expressionOperators/POProject.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/expressionOperators/POProject.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/expressionOperators/POProject.java Tue Jun 24 10:31:00 2008
@@ -111,7 +111,7 @@
     /**
      * Fetches the input tuple and returns the requested
      * column
-     * @return
+     * @return next value.
      * @throws ExecException
      */
     public Result getNext() throws ExecException{

Added: incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/package.html
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/package.html?rev=671269&view=auto
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/package.html (added)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/physicalLayer/package.html Tue Jun 24 10:31:00 2008
@@ -0,0 +1,106 @@
+<html>
+<body>
+
+<p>
+Implementation of physical operators that use hadoop as the execution engine
+and data storage.
+
+<h2> Design </h2>
+<p>
+Physical operators use the operator, plan, visitor, and optimizer framework
+provided by the {@link org.apache.pig.impl.plan} package.
+<p>
+As with {@link org.apache.pig.impl.logicalLayer}, physical operators consist
+of {@link org.apache.pig.impl.physicalLayer.relationalOperators} and
+{@link org.apache.pig.impl.physicalLayer.expressionOperators}.  In many data
+processing systems relational operators and expression operators are modeled
+as different entities because they behave differently.  Pig blurs, though does
+not entirely remove, this distinction because of its support for nested
+operations.
+<p>
+Conceptually, relational operators work on an entire relation (in Pig's case,
+a bag).  In terms of implementation, they operate on one record (tuple) at a
+time.  This avoids needing to load the entire relation into memory before
+operating on it.
+<p>
+Expression operators, on the other hand, operate on the assumption that they
+are provided their entire input at invocation time and provide their entire
+output when they are finished.
+<p>
+Pig's hadoop implementation implements a pull based model, where each operator
+calls getNext() on the operator before it in the plan.  getNext() is
+implemented for each of the different data types, so that operators can
+request the data type they expect.  Relational operators will always expect a
+tuple.  Expression operators can request any data type.
+<p>
+As with the logical plan, physical relational operators often have embedded
+physical plans.  When a relational operator calls getNext() on its predecessor
+and receives a tuple, it will attach that tuple to its embedded physical plan(s)
+and then call getNext() on the root node(s) of those plan(s) in order to get the
+output.  For example, the Pig Latin <code>filter A by $0 != 5</code> will
+produce a POFilter object, with an embedded physical plan that consists of
+POProject(0), POConst(5), both attached to PONotEqual.  Each time
+POFilter.getNext() is called, it will call its predecessors getNext() method,
+and then attach the input to POProject and POConst.  It will then call
+PONotEqual.getNext().  PONotEqual will in turn call POProject.getNext() and
+POConst.getNext(), and then evaluate and return the results.  If the result is
+true, POFilter will return its input tuple.
+If the answer is false, it will call it's predecessor's getNext() method and
+try again.
+<p>
+Given Pig's nested data and execution models, there are places it is necessary
+to move between relational and expression operators.  Consider the following
+Pig Latin script:
+<code>
+A = load 'myfile';
+B = group A by $0;
+C = foreach B {
+    C1 = filter $1 by $0 &gt; 0; 
+    C2 = distinct C1;
+    generate group, COUNT(C2), SUM(C1.$0);
+}
+</code>
+In particular, the foreach section presents some interesting challenges.
+<p>
+First, foreach has three separate outputs, all of which require separate but
+parallel executions.  To address this, each element of the foreach is described by a
+separate embedded plan.  This can cause duplication of
+operations, as in this plan.
+In this case splitting the plans for COUNT and SUM cause a double execution of
+the <code>C1 = filter</code> section of the script.  But it avoids needing to
+place a split operator between filter -&gt; distinct and filter -&gt; SUM.
+<p>
+The second issue presented by the nested logic is that the
+foreach operator is going to receive a tuple with the format ($0, bag), where
+bag is a collection of all the tuples with a given value for $0.  It will then
+attach that to the filter.  But filter does not expect a bag.  It expects
+to get tuples.  On the other end, distinct will be outputing tuples.  But
+COUNT() expects C2 to be a bag that can be processed by COUNT as a whole.
+<p>
+To address this issue, some operators have been modified to provide
+"bookend" functionality.  That is, the ability to translate between relational
+and expression operators.
+The embedded plan for calculating the COUNT in the foreach will
+look like:  POProject(1) -&gt; PODistinct -&gt; POProject(*) -&gt; COUNT().
+The first POProject(1) will have a bag attached as its input by POForeach.
+But POFilter will call getNext(Tuple).  In this case, POProject will know to
+open the bag and provide the tuples one at a time, until the bag is empty, at
+which point it will return STATUS_EOP.  The PODistinct will be expecting to
+return tuples, but POProject(*) will call getNext(bag).  In this case all
+relational operators will be able to accumulate all of the tuples by calling
+getNext(tuple) on themselves until they see STATUS_EOP, packaging those tuples
+into a bag, and then returning that bag.
+<p>
+And third, project is being subtly overloaded here.  In cases where the script
+says <code>C = foreach B generate $1</code>, this type of projection means take the second
+element from the tuple and project it.  But in cases like <code>C = foreach B
+generate SUM($1.$0)</code> and $1 is a bag, this type of projection expects to
+receive a bag ($1) and output a modified bag ($1 with only the first field,
+$0, remaining in all the tuples in the bag).  To handle this issue, POProject
+will, when it sees that its predecessor is a POProject and its successors is
+an expression operator it will perform a projection on the bag (that is,
+perform the specified project on each tuple in the bag) rather than on a
+tuple.
+
+</body>
+</html>

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/plan/Operator.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/plan/Operator.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/plan/Operator.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/plan/Operator.java Tue Jun 24 10:31:00 2008
@@ -38,8 +38,7 @@
     protected OperatorKey mKey;
 
     /**
-     * @param -
-     *            k Operator key to assign to this node.
+     * @param k Operator key to assign to this node.
      */
     public Operator(OperatorKey k) {
         mKey = k;

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/plan/OperatorPlan.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/plan/OperatorPlan.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/plan/OperatorPlan.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/plan/OperatorPlan.java Tue Jun 24 10:31:00 2008
@@ -266,7 +266,7 @@
      * a combined graph with each one as a component
      * It doesn't support merging of shared plans
      * @param inpPlan
-     * @return
+     * @return this pointer
      * @throws PlanException
      */
     public OperatorPlan<E> merge(OperatorPlan<E> inpPlan) throws PlanException {

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/plan/PlanVisitor.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/plan/PlanVisitor.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/plan/PlanVisitor.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/plan/PlanVisitor.java Tue Jun 24 10:31:00 2008
@@ -68,7 +68,7 @@
     /**
      * Push the current walker onto the stack of saved walkers and begin using
      * the newly passed walker as the current walker.
-     * @param newWalker new walker to set as the current walker.
+     * @param walker new walker to set as the current walker.
      */
     protected void pushWalker(PlanWalker<O, P> walker) {
         mWalkers.push(mCurrentWalker);

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Rule.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Rule.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Rule.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Rule.java Tue Jun 24 10:31:00 2008
@@ -42,12 +42,12 @@
     public Transformer<O, P> transformer;
 
     /**
-     * @param nodes List of node types to look for.
-     * @param edges Map of integers to integers.  Each integer
+     * @param n List of node types to look for.
+     * @param e Map of integers to integers.  Each integer
      * represents the offset into nodes list.
-     * @param required List of boolean indicating whether given nodes are
+     * @param r List of boolean indicating whether given nodes are
      * required for the pattern to match.
-     * @param transformer Transformer to apply if the rule matches.
+     * @param t Transformer to apply if the rule matches.
      */
     public Rule(List<String> n,
                 Map<Integer, Integer> e, 

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Transformer.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Transformer.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Transformer.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/plan/optimizer/Transformer.java Tue Jun 24 10:31:00 2008
@@ -45,7 +45,7 @@
      * @param nodes - List of nodes declared in transform ($1 = nodes[0],
      * etc.)  Remember that somes entries in node[] may be NULL since they may
      * not be created until after the transform.
-     * @returns - true if the transform should be done.
+     * @return - true if the transform should be done.
      */
     public abstract boolean check(List<O> nodes);
 

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/util/JarManager.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/util/JarManager.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/util/JarManager.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/util/JarManager.java Tue Jun 24 10:31:00 2008
@@ -91,7 +91,6 @@
      * @param funcs
      *            the functions that will be used in a job and whose jar files need to be included
      *            in the final merged jar file.
-     * @return the temporary path to the merged jar file.
      * @throws ClassNotFoundException
      * @throws IOException
      */
@@ -248,7 +247,6 @@
      * Find a jar that contains a class of the same name, if any. It will return a jar file, even if
      * that is not the first thing on the class path that has a class with the same name.
      * 
-     * @author Owen O'Malley (Copied from JobConf)
      * @param my_class
      *            the class to find
      * @return a jar file that contains the class, or null

Modified: incubator/pig/branches/types/src/org/apache/pig/impl/util/Spillable.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/impl/util/Spillable.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/impl/util/Spillable.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/impl/util/Spillable.java Tue Jun 24 10:31:00 2008
@@ -23,12 +23,12 @@
     /**
      * Instructs an object to spill whatever it can to disk and release
      * references to any data structures it spills.
-     * @returns number of objects spilled.
+     * @return number of objects spilled.
      */
     long spill();
     
     /**
      * Requests that an object return an estimate of its in memory size.
-     * @returns estimated in memory size. */
+     * @return estimated in memory size. */
     long getMemorySize();
 }

Added: incubator/pig/branches/types/src/org/apache/pig/package.html
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/package.html?rev=671269&view=auto
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/package.html (added)
+++ incubator/pig/branches/types/src/org/apache/pig/package.html Tue Jun 24 10:31:00 2008
@@ -0,0 +1,12 @@
+<html>
+<body>
+
+<p>
+Public interfaces and classes for Pig.
+<p>
+{@link org.apache.pig.PigServer} is the interface to Pig for use by other java
+programs.  Other interfaces and classes in this package are for use by users
+when implementing user defined functions.
+
+</body>
+</html>

Modified: incubator/pig/branches/types/src/org/apache/pig/tools/cmdline/CmdLineParser.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/tools/cmdline/CmdLineParser.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/tools/cmdline/CmdLineParser.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/tools/cmdline/CmdLineParser.java Tue Jun 24 10:31:00 2008
@@ -56,7 +56,7 @@
  * @param c Single character designator for this option.  It cannot be '-'.
  * @param s Full word designator for this option.  This can be null, in which case
  * no long designator will exist for this option.
- * @param valueExpected If REQUIRED, a value will be expected with this option.  If
+ * @param ve If REQUIRED, a value will be expected with this option.  If
  * OPTIONAL a value will be accepted if it is seen.
  * @throws AssertionError if there is no short option, or if this option has already been
  * used.

Modified: incubator/pig/branches/types/src/org/apache/pig/tools/parameters/PreprocessorContext.java
URL: http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/tools/parameters/PreprocessorContext.java?rev=671269&r1=671268&r2=671269&view=diff
==============================================================================
--- incubator/pig/branches/types/src/org/apache/pig/tools/parameters/PreprocessorContext.java (original)
+++ incubator/pig/branches/types/src/org/apache/pig/tools/parameters/PreprocessorContext.java Tue Jun 24 10:31:00 2008
@@ -115,7 +115,7 @@
      *
      * @param key - parameter name
      * @param val - value supplied for the key
-     * @param overwide - specifies whether the value should be replaced if it already exists
+     * @param overwrite - specifies whether the value should be replaced if it already exists
      */
     public  void processOrdLine(String key, String val, Boolean overwrite) {
 



Mime
View raw message