Return-Path: Delivered-To: apmail-hadoop-pig-commits-archive@www.apache.org Received: (qmail 38961 invoked from network); 10 Feb 2010 02:31:10 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 10 Feb 2010 02:31:10 -0000 Received: (qmail 22287 invoked by uid 500); 10 Feb 2010 02:31:10 -0000 Delivered-To: apmail-hadoop-pig-commits-archive@hadoop.apache.org Received: (qmail 22238 invoked by uid 500); 10 Feb 2010 02:31:09 -0000 Mailing-List: contact pig-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: pig-dev@hadoop.apache.org Delivered-To: mailing list pig-commits@hadoop.apache.org Received: (qmail 22229 invoked by uid 500); 10 Feb 2010 02:31:09 -0000 Delivered-To: apmail-incubator-pig-commits@incubator.apache.org Received: (qmail 22226 invoked by uid 99); 10 Feb 2010 02:31:09 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Feb 2010 02:31:09 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Feb 2010 02:31:07 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id B180C23888E8; Wed, 10 Feb 2010 02:30:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r908324 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java src/org/apache/pig/data/NonSpillableDataBag.java Date: Wed, 10 Feb 2010 02:30:45 -0000 To: pig-commits@incubator.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100210023045.B180C23888E8@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: hashutosh Date: Wed Feb 10 02:30:45 2010 New Revision: 908324 URL: http://svn.apache.org/viewvc?rev=908324&view=rev Log: PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to collect tuples Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=908324&r1=908323&r2=908324&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Feb 10 02:30:45 2010 @@ -24,6 +24,9 @@ IMPROVEMENTS +PIG-1230: Streaming input in POJoinPackage should use nonspillable bag to +collect tuples (ashutoshc) + PIG-1224: Collected group should change to use new (internal) bag (ashutoshc) PIG-1046: join algorithm specification is within double quotes (ashutoshc) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java?rev=908324&r1=908323&r2=908324&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POJoinPackage.java Wed Feb 10 02:30:45 2010 @@ -29,6 +29,7 @@ import org.apache.pig.data.DataBag; import org.apache.pig.data.DataType; import org.apache.pig.data.InternalCachedBag; +import org.apache.pig.data.NonSpillableDataBag; import org.apache.pig.data.Tuple; import org.apache.pig.impl.io.NullableTuple; import org.apache.pig.impl.plan.NodeIdGenerator; @@ -140,14 +141,16 @@ lastInputTuple = false; //Put n-1 inputs into bags dbs = new DataBag[numInputs]; - for (int i = 0; i < numInputs; i++) { + for (int i = 0; i < numInputs - 1; i++) { dbs[i] = useDefaultBag ? BagFactory.getInstance().newDefaultBag() // In a very rare case if there is a POStream after this // POJoinPackage in the pipeline and is also blocking the pipeline; // constructor argument should be 2 * numInputs. But for one obscure // case we don't want to pay the penalty all the time. - : new InternalCachedBag(numInputs); + : new InternalCachedBag(numInputs-1); } + // For last bag, we always use NonSpillableBag. + dbs[lastBagIndex] = new NonSpillableDataBag((int)chunkSize); //For each Nullable tuple in the input, put it //into the corresponding bag based on the index, Modified: hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=908324&r1=908323&r2=908324&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Wed Feb 10 02:30:45 2010 @@ -55,6 +55,15 @@ } /** + * Use this constructor if you know upfront how many tuples you are going + * to put in this bag. + * @param tupleCount + */ + public NonSpillableDataBag(int tupleCount){ + mContents = new ArrayList(tupleCount); + } + + /** * This constructor creates a bag out of an existing list * of tuples by taking ownership of the list and NOT * copying the contents of the list.