Return-Path: X-Original-To: apmail-pig-commits-archive@www.apache.org Delivered-To: apmail-pig-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 60AFEC038 for ; Tue, 10 Jul 2012 17:19:12 +0000 (UTC) Received: (qmail 95866 invoked by uid 500); 10 Jul 2012 17:19:12 -0000 Delivered-To: apmail-pig-commits-archive@pig.apache.org Received: (qmail 95837 invoked by uid 500); 10 Jul 2012 17:19:12 -0000 Mailing-List: contact commits-help@pig.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pig.apache.org Delivered-To: mailing list commits@pig.apache.org Received: (qmail 95829 invoked by uid 99); 10 Jul 2012 17:19:12 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 10 Jul 2012 17:19:12 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 10 Jul 2012 17:19:10 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id A6D472388A6E for ; Tue, 10 Jul 2012 17:18:50 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1359793 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/CubeDimensions.java src/org/apache/pig/parser/LogicalPlanBuilder.java test/org/apache/pig/test/TestCubeOperator.java Date: Tue, 10 Jul 2012 17:18:50 -0000 To: commits@pig.apache.org From: dvryaboy@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120710171850.A6D472388A6E@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dvryaboy Date: Tue Jul 10 17:18:50 2012 New Revision: 1359793 URL: http://svn.apache.org/viewvc?rev=1359793&view=rev Log: PIG-2726: Handling legitimate NULL values in Cube operator Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java pig/trunk/test/org/apache/pig/test/TestCubeOperator.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1359793&r1=1359792&r2=1359793&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Tue Jul 10 17:18:50 2012 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-2726: Handling legitimate NULL values in Cube operator (prasanth_j via dvryaboy) + PIG-2808: Add *.project to .gitignore (azaroth) PIG-2806: Fix merge join test regression (jcoveney) Modified: pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java?rev=1359793&r1=1359792&r2=1359793&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java (original) +++ pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java Tue Jul 10 17:18:50 2012 @@ -70,6 +70,7 @@ public class CubeDimensions extends Eval private static BagFactory bf = BagFactory.getInstance(); private static TupleFactory tf = TupleFactory.getInstance(); private final String allMarker; + private final String unknown = "unknown"; public CubeDimensions() { this(null); @@ -81,11 +82,26 @@ public class CubeDimensions extends Eval @Override public DataBag exec(Tuple tuple) throws IOException { List result = Lists.newArrayListWithCapacity((int) Math.pow(2, tuple.size())); + Tuple nonNullTuple = convertNullToUnknown(tuple); Tuple newt = tf.newTuple(tuple.size()); - recursivelyCube(result, tuple, 0, newt); + recursivelyCube(result, nonNullTuple, 0, newt); return bf.newDefaultBag(result); } + // if the dimension values contain null then replace it with "unknown" value + // since null will be used for rollups + private Tuple convertNullToUnknown(Tuple tuple) throws ExecException { + Tuple nonNullTup = tf.newTuple(tuple.getAll()); + int idx = 0; + for(Object obj : tuple.getAll()) { + if( (obj == null) ) { + nonNullTup.set(idx, unknown); + } + idx++; + } + return nonNullTup; + } + private void recursivelyCube(List result, Tuple input, int index, Tuple newt) throws ExecException { newt.set(index, input.get(index)); if (index == input.size() - 1 ) { Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1359793&r1=1359792&r2=1359793&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original) +++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Tue Jul 10 17:18:50 2012 @@ -462,7 +462,7 @@ public class LogicalPlanBuilder { // Create UDF with user specified dimensions LogicalExpressionPlan uexpPlan = new LogicalExpressionPlan(); - new UserFuncExpression(uexpPlan, new FuncSpec(CubeDimensions.class.getName(), "NULL"), lexpList); + new UserFuncExpression(uexpPlan, new FuncSpec(CubeDimensions.class.getName()), lexpList); for (LogicalExpressionPlan lexp : lexpPlanList) { Iterator it = lexp.getOperators(); while (it.hasNext()) { Modified: pig/trunk/test/org/apache/pig/test/TestCubeOperator.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestCubeOperator.java?rev=1359793&r1=1359792&r2=1359793&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/TestCubeOperator.java (original) +++ pig/trunk/test/org/apache/pig/test/TestCubeOperator.java Tue Jul 10 17:18:50 2012 @@ -44,8 +44,8 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; public class TestCubeOperator { private static PigServer pigServer; @@ -82,6 +82,11 @@ public class TestCubeOperator { tuple("u10,women,green,apple"), tuple("u11,men,red,apple"), tuple("u12,women,green,mango")); + + data.set("input3", + tuple("dog", "miami", 12), + tuple(null, "miami", 18)); + } @AfterClass @@ -94,25 +99,25 @@ public class TestCubeOperator { String query = "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = cube a by (x,y);" + - "c = foreach b generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.z) as total;" + + "c = foreach b generate flatten(group) as (type,location), COUNT_STAR(cube) as count, SUM(cube.z) as total;" + "store c into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)), + tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63)) ); List out = data.get("output"); @@ -130,26 +135,26 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x,y:chararray,z:long);" + "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = cube a by (x,y);" + - "c = foreach b generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.z) as total;" + + "c = foreach b generate flatten(group) as (type,location), COUNT_STAR(cube) as count, SUM(cube.z) as total;" + "store c into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)), + tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63)) ); List out = data.get("output"); @@ -166,26 +171,26 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = foreach a generate x as type,y as location,z as number;" + "c = cube b by (type,location);" + - "d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.number) as total;" + + "d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count, SUM(cube.number) as total;" + "store d into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)), + tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63)) ); List out = data.get("output"); @@ -208,12 +213,12 @@ public class TestCubeOperator { Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)18)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)18)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)12)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)12)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)30)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)30)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)18)), + tf.newTuple(Lists.newArrayList("cat", null, (long)18)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)), + tf.newTuple(Lists.newArrayList("dog", null, (long)12)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)30)), + tf.newTuple(Lists.newArrayList(null, null, (long)30)) ); List out = data.get("output"); @@ -230,26 +235,26 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray);" + "b = foreach a generate x as type,y as location;" + "c = cube b by (*);" + - "d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count;" + + "d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count;" + "store d into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3)), + tf.newTuple(Lists.newArrayList(null, null, (long)7)) ); List out = data.get("output"); @@ -266,26 +271,26 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = foreach a generate x as type,y as location, z as number;" + "c = cube b by ($0..$1);" + - "d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.number) as total;" + + "d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count, SUM(cube.number) as total;" + "store d into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)), + tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63)) ); List out = data.get("output"); @@ -301,7 +306,7 @@ public class TestCubeOperator { String query = "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = foreach a generate x as type,y as location, z as number;" + "c = cube b by ($0..$1,$0..$1);" - + "d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.number) as total;" + + "d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.number) as total;" + "store d into 'output' using mock.Storage();"; try { @@ -323,21 +328,21 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = filter a by x == 'dog';" + "c = cube b by (x,y);" + - "d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.z) as total;" + + "d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.z) as total;" + "store d into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); // Iterator it = pigServer.openIterator("d"); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)3, (long)31)) + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList(null, null, (long)3, (long)31)) ); List out = data.get("output"); @@ -354,26 +359,26 @@ public class TestCubeOperator { "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + "b = order a by $2;" + "c = cube b by (x,y);" + - "d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.z) as total;" + + "d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.z) as total;" + "store d into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)), + tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)), + tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)), + tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63)) ); List out = data.get("output"); @@ -390,24 +395,24 @@ public class TestCubeOperator { "b = load 'input' USING mock.Storage() as (a2,b2,c2:long,d2:chararray);" + "c = join a by a1, b by d2;" + "d = cube c by ($4,$5);" + - "e = foreach d generate flatten(group), COUNT(cube) as count, SUM(cube.c2) as total;" + + "e = foreach d generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.c2) as total;" + "store e into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)2, (long)26)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)5, (long)49)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", null, (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", null, (long)2, (long)26)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList(null, null, (long)5, (long)49)) ); List out = data.get("output"); @@ -425,30 +430,82 @@ public class TestCubeOperator { "c = cogroup a by a1, b by d2;" + "d = foreach c generate flatten(a), flatten(b);" + "e = cube d by (a2,b2);" + - "f = foreach e generate flatten(group), COUNT(cube) as count, SUM(cube.c2) as total;" + + "f = foreach e generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.c2) as total;" + "store f into 'output' using mock.Storage();"; Util.registerMultiLineQuery(pigServer, query); Set expected = ImmutableSet.of( - tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("cat", "NULL", (long)1, (long)18)), - tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)), - tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)), - tf.newTuple(ImmutableList.of("dog", "NULL", (long)2, (long)26)), - tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)), - tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)), - tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)), - tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)), - tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)1)), - tf.newTuple(ImmutableList.of("NULL", "NULL", (long)5, (long)49)) + tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("cat", null, (long)1, (long)18)), + tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)), + tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)), + tf.newTuple(Lists.newArrayList("dog", null, (long)2, (long)26)), + tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)), + tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)), + tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)), + tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)1)), + tf.newTuple(Lists.newArrayList(null, null, (long)5, (long)49)) + ); + + List out = data.get("output"); + for( Tuple tup : out ) { + assertTrue(expected+" contains "+tup, expected.contains(tup)); + } + } + + @Test + public void testCubeWithNULLs() throws IOException { + // test for dimension values with legitimate null values + String query = + "a = load 'input3' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + + "b = cube a by (x,y);" + + "c = foreach b generate flatten(group) as (type,location), SUM(cube.z) as total;" + + "store c into 'output' using mock.Storage();"; + + Util.registerMultiLineQuery(pigServer, query); + + Set expected = ImmutableSet.of( + tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)), + tf.newTuple(Lists.newArrayList("dog", null, (long)12)), + tf.newTuple(Lists.newArrayList(null, "miami", (long)30)), + tf.newTuple(Lists.newArrayList(null, null, (long)30)), + tf.newTuple(Lists.newArrayList("unknown", "miami", (long)18)), + tf.newTuple(Lists.newArrayList("unknown", null, (long)18)) ); List out = data.get("output"); for( Tuple tup : out ) { assertTrue(expected+" contains "+tup, expected.contains(tup)); } + + } + + @Test + public void testCubeWithNULLAndFilter() throws IOException { + // test for dimension values with legitimate null values + // followed by filter + String query = + "a = load 'input3' USING mock.Storage() as (x:chararray,y:chararray,z:long);" + + "b = cube a by (x,y);" + + "c = foreach b generate flatten(group) as (type,location), SUM(cube.z) as total;" + + "d = filter c by type!='unknown';" + + "store d into 'output' using mock.Storage();"; + + Util.registerMultiLineQuery(pigServer, query); + + Set expected = ImmutableSet.of( + tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)), + tf.newTuple(Lists.newArrayList("dog", null, (long)12)) + ); + + List out = data.get("output"); + for( Tuple tup : out ) { + assertTrue(expected+" contains "+tup, expected.contains(tup)); + } + } @Test @@ -474,7 +531,7 @@ public class TestCubeOperator { ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); pigServer.explain("b", ps); - assertTrue(baos.toString().contains("CubeDimensions('NULL')")); + assertTrue(baos.toString().contains("CubeDimensions")); } @Test