pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r908324 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java src/org/apache/pig/data/NonSpillableDataBag.java
Date Wed, 10 Feb 2010 02:30:45 GMT
Author: hashutosh
Date: Wed Feb 10 02:30:45 2010
New Revision: 908324

URL: http://svn.apache.org/viewvc?rev=908324&view=rev
Log:
PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to collect tuples

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
    hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Wed Feb 10 02:30:45 2010
@@ -24,6 +24,9 @@
 
 IMPROVEMENTS
 
+PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to
+collect tuples (ashutoshc) 
+
 PIG-1224: Collected group should change to use new (internal) bag (ashutoshc)
 
 PIG-1046: join algorithm specification is within double quotes (ashutoshc)

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
(original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java
Wed Feb 10 02:30:45 2010
@@ -29,6 +29,7 @@
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.InternalCachedBag;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.io.NullableTuple;
 import org.apache.pig.impl.plan.NodeIdGenerator;
@@ -140,14 +141,16 @@
             lastInputTuple = false;
             //Put n-1 inputs into bags
             dbs = new DataBag[numInputs];
-            for (int i = 0; i < numInputs; i++) {
+            for (int i = 0; i < numInputs - 1; i++) {
                 dbs[i] = useDefaultBag ? BagFactory.getInstance().newDefaultBag() 
                 // In a very rare case if there is a POStream after this 
                 // POJoinPackage in the pipeline and is also blocking the pipeline;
                 // constructor argument should be 2 * numInputs. But for one obscure
                 // case we don't want to pay the penalty all the time.        
-                        : new InternalCachedBag(numInputs);                    
+                        : new InternalCachedBag(numInputs-1);                    
             }
+            // For last bag, we always use NonSpillableBag.
+            dbs[lastBagIndex] = new NonSpillableDataBag((int)chunkSize);
             
             //For each Nullable tuple in the input, put it
             //into the corresponding bag based on the index,

Modified: hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=908324&r1=908323&r2=908324&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Wed Feb 10 02:30:45
2010
@@ -55,6 +55,15 @@
     }
 
     /**
+     * Use this constructor if you know upfront how many tuples you are going
+     * to put in this bag.
+     * @param tupleCount
+     */
+    public NonSpillableDataBag(int tupleCount){
+        mContents = new ArrayList<Tuple>(tupleCount);
+    }
+    
+    /**
      * This constructor creates a bag out of an existing list
      * of tuples by taking ownership of the list and NOT
      * copying the contents of the list.



Mime
View raw message