pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dvrya...@apache.org
Subject svn commit: r1359793 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/CubeDimensions.java src/org/apache/pig/parser/LogicalPlanBuilder.java test/org/apache/pig/test/TestCubeOperator.java
Date Tue, 10 Jul 2012 17:18:50 GMT
Author: dvryaboy
Date: Tue Jul 10 17:18:50 2012
New Revision: 1359793

URL: http://svn.apache.org/viewvc?rev=1359793&view=rev
Log:
PIG-2726: Handling legitimate NULL values in Cube operator

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java
    pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
    pig/trunk/test/org/apache/pig/test/TestCubeOperator.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1359793&r1=1359792&r2=1359793&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jul 10 17:18:50 2012
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-2726: Handling legitimate NULL values in Cube operator (prasanth_j via dvryaboy)
+
 PIG-2808: Add *.project to .gitignore (azaroth)
 
 PIG-2806: Fix merge join test regression (jcoveney)

Modified: pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java?rev=1359793&r1=1359792&r2=1359793&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/CubeDimensions.java Tue Jul 10 17:18:50 2012
@@ -70,6 +70,7 @@ public class CubeDimensions extends Eval
     private static BagFactory bf = BagFactory.getInstance();
     private static TupleFactory tf = TupleFactory.getInstance();
     private final String allMarker;
+    private final String unknown = "unknown";
 
     public CubeDimensions() {
         this(null);
@@ -81,11 +82,26 @@ public class CubeDimensions extends Eval
     @Override
     public DataBag exec(Tuple tuple) throws IOException {
         List<Tuple> result = Lists.newArrayListWithCapacity((int) Math.pow(2, tuple.size()));
+        Tuple nonNullTuple = convertNullToUnknown(tuple);
         Tuple newt = tf.newTuple(tuple.size());
-        recursivelyCube(result, tuple, 0, newt);
+        recursivelyCube(result, nonNullTuple, 0, newt);
         return bf.newDefaultBag(result);
     }
 
+    // if the dimension values contain null then replace it with "unknown" value
+    // since null will be used for rollups
+    private Tuple convertNullToUnknown(Tuple tuple) throws ExecException {
+	Tuple nonNullTup = tf.newTuple(tuple.getAll());
+	int idx = 0;
+	for(Object obj : tuple.getAll()) {
+	    if( (obj == null) ) {
+		nonNullTup.set(idx, unknown);
+	    }
+	    idx++;
+	}
+	return nonNullTup;
+    }
+    
     private void recursivelyCube(List<Tuple> result, Tuple input, int index, Tuple
newt) throws ExecException {
         newt.set(index, input.get(index));
         if (index == input.size() - 1 ) {

Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1359793&r1=1359792&r2=1359793&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Tue Jul 10 17:18:50 2012
@@ -462,7 +462,7 @@ public class LogicalPlanBuilder {
 
 	// Create UDF with user specified dimensions 
 	LogicalExpressionPlan uexpPlan = new LogicalExpressionPlan();
-	new UserFuncExpression(uexpPlan, new FuncSpec(CubeDimensions.class.getName(), "NULL"), lexpList);
+	new UserFuncExpression(uexpPlan, new FuncSpec(CubeDimensions.class.getName()), lexpList);
 	for (LogicalExpressionPlan lexp : lexpPlanList) {
 	    Iterator<Operator> it = lexp.getOperators();
 	    while (it.hasNext()) {

Modified: pig/trunk/test/org/apache/pig/test/TestCubeOperator.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestCubeOperator.java?rev=1359793&r1=1359792&r2=1359793&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestCubeOperator.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestCubeOperator.java Tue Jul 10 17:18:50 2012
@@ -44,8 +44,8 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
 
 public class TestCubeOperator {
     private static PigServer pigServer;
@@ -82,6 +82,11 @@ public class TestCubeOperator {
 		tuple("u10,women,green,apple"),
 		tuple("u11,men,red,apple"),
 		tuple("u12,women,green,mango"));
+	
+	data.set("input3", 
+		tuple("dog", "miami", 12),
+		tuple(null, "miami", 18));
+
     }
 
     @AfterClass
@@ -94,25 +99,25 @@ public class TestCubeOperator {
 	String query =
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = cube a by (x,y);" +
-			"c = foreach b generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.z)
as total;" +
+			"c = foreach b generate flatten(group) as (type,location), COUNT_STAR(cube) as count,
SUM(cube.z) as total;" +
 			"store c into 'output' using mock.Storage();";
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -130,26 +135,26 @@ public class TestCubeOperator {
 			"a = load 'input' USING mock.Storage() as (x,y:chararray,z:long);" +
 			"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = cube a by (x,y);" + 
-			"c = foreach b generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.z)
as total;" +
+			"c = foreach b generate flatten(group) as (type,location), COUNT_STAR(cube) as count,
SUM(cube.z) as total;" +
 			"store c into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -166,26 +171,26 @@ public class TestCubeOperator {
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = foreach a generate x as type,y as location,z as number;" +
 			"c = cube b by (type,location);" + 
-			"d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.number)
as total;" +
+			"d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count,
SUM(cube.number) as total;" +
 			"store d into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -208,12 +213,12 @@ public class TestCubeOperator {
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)18)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)12)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)30))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)18)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)12)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)30)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)30))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -230,26 +235,26 @@ public class TestCubeOperator {
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray);" +
 			"b = foreach a generate x as type,y as location;" +
 			"c = cube b by (*);" + 
-			"d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count;" +
+			"d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count;"
+
 			"store d into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -266,26 +271,26 @@ public class TestCubeOperator {
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = foreach a generate x as type,y as location, z as number;" +
 			"c = cube b by ($0..$1);" + 
-			"d = foreach c generate flatten(group) as (type,location), COUNT(cube) as count, SUM(cube.number)
as total;" +
+			"d = foreach c generate flatten(group) as (type,location), COUNT_STAR(cube) as count,
SUM(cube.number) as total;" +
 			"store d into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -301,7 +306,7 @@ public class TestCubeOperator {
 	String query = "a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);"
 		+ "b = foreach a generate x as type,y as location, z as number;"
 		+ "c = cube b by ($0..$1,$0..$1);"
-		+ "d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.number) as total;"
+		+ "d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.number) as
total;"
 		+ "store d into 'output' using mock.Storage();";
 
 	try {
@@ -323,21 +328,21 @@ public class TestCubeOperator {
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = filter a by x == 'dog';" +
 			"c = cube b by (x,y);" + 
-			"d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.z) as total;" +
+			"d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.z) as total;"
+
 			"store d into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 	// Iterator<Tuple> it = pigServer.openIterator("d");
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)3, (long)31))
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)3, (long)31))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -354,26 +359,26 @@ public class TestCubeOperator {
 		"a = load 'input' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
 			"b = order a by $2;" +
 			"c = cube b by (x,y);" + 
-			"d = foreach c generate flatten(group), COUNT(cube) as count, SUM(cube.z) as total;" +
+			"d = foreach c generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.z) as total;"
+
 			"store d into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "naples", (long)1, (long)9)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)2, (long)27)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "naples", (long)1, (long)5)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)3, (long)31)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)3, (long)15)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)7, (long)63))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", "naples", (long)1, (long)9)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)2, (long)27)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", "naples", (long)1, (long)5)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)3, (long)31)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)3, (long)15)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)7, (long)63))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -390,24 +395,24 @@ public class TestCubeOperator {
 			"b = load 'input' USING mock.Storage() as (a2,b2,c2:long,d2:chararray);" +
 			"c = join a by a1, b by d2;" +
 			"d = cube c by ($4,$5);" + 
-			"e = foreach d generate flatten(group), COUNT(cube) as count, SUM(cube.c2) as total;"
+
+			"e = foreach d generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.c2) as total;"
+
 			"store e into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)2, (long)26)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)5, (long)49))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)2, (long)26)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)5, (long)49))
 		);
 
 	List<Tuple> out = data.get("output");
@@ -425,30 +430,82 @@ public class TestCubeOperator {
 			"c = cogroup a by a1, b by d2;" +
 			"d = foreach c generate flatten(a), flatten(b);" +
 			"e = cube d by (a2,b2);" +
-			"f = foreach e generate flatten(group), COUNT(cube) as count, SUM(cube.c2) as total;"
+
+			"f = foreach e generate flatten(group), COUNT_STAR(cube) as count, SUM(cube.c2) as total;"
+
 			"store f into 'output' using mock.Storage();";
 
 	Util.registerMultiLineQuery(pigServer, query);
 
 	Set<Tuple> expected = ImmutableSet.of(
-		tf.newTuple(ImmutableList.of("cat", "miami", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("cat", "NULL", (long)1, (long)18)),
-		tf.newTuple(ImmutableList.of("dog", "miami", (long)1, (long)12)),
-		tf.newTuple(ImmutableList.of("dog", "tampa", (long)1, (long)14)),
-		tf.newTuple(ImmutableList.of("dog", "NULL", (long)2, (long)26)),
-		tf.newTuple(ImmutableList.of("turtle", "tampa", (long)1, (long)4)),
-		tf.newTuple(ImmutableList.of("turtle", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("turtle", "NULL", (long)2, (long)5)),
-		tf.newTuple(ImmutableList.of("NULL", "miami", (long)2, (long)30)),
-		tf.newTuple(ImmutableList.of("NULL", "tampa", (long)2, (long)18)),
-		tf.newTuple(ImmutableList.of("NULL", "naples", (long)1, (long)1)),
-		tf.newTuple(ImmutableList.of("NULL", "NULL", (long)5, (long)49))
+		tf.newTuple(Lists.newArrayList("cat", "miami", (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("cat", null, (long)1, (long)18)),
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)1, (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", "tampa", (long)1, (long)14)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)2, (long)26)),
+		tf.newTuple(Lists.newArrayList("turtle", "tampa", (long)1, (long)4)),
+		tf.newTuple(Lists.newArrayList("turtle", "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList("turtle", null, (long)2, (long)5)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)2, (long)30)),
+		tf.newTuple(Lists.newArrayList(null, "tampa", (long)2, (long)18)),
+		tf.newTuple(Lists.newArrayList(null, "naples", (long)1, (long)1)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)5, (long)49))
+		);
+
+	List<Tuple> out = data.get("output");
+	for( Tuple tup : out ) {
+	    assertTrue(expected+" contains "+tup, expected.contains(tup));
+	}
+    }
+
+    @Test
+    public void testCubeWithNULLs() throws IOException {
+	// test for dimension values with legitimate null values
+	String query = 
+		"a = load 'input3' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
+			"b = cube a by (x,y);" + 
+			"c = foreach b generate flatten(group) as (type,location), SUM(cube.z) as total;" +
+			"store c into 'output' using mock.Storage();";
+
+	Util.registerMultiLineQuery(pigServer, query);
+
+	Set<Tuple> expected = ImmutableSet.of(
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)12)),
+		tf.newTuple(Lists.newArrayList(null, "miami", (long)30)),
+		tf.newTuple(Lists.newArrayList(null, null, (long)30)),
+		tf.newTuple(Lists.newArrayList("unknown", "miami", (long)18)),
+		tf.newTuple(Lists.newArrayList("unknown", null, (long)18))
 		);
 
 	List<Tuple> out = data.get("output");
 	for( Tuple tup : out ) {
 	    assertTrue(expected+" contains "+tup, expected.contains(tup));
 	}
+
+    }
+    
+    @Test
+    public void testCubeWithNULLAndFilter() throws IOException {
+	// test for dimension values with legitimate null values
+	// followed by filter
+	String query = 
+		"a = load 'input3' USING mock.Storage() as (x:chararray,y:chararray,z:long);" +
+			"b = cube a by (x,y);" + 
+			"c = foreach b generate flatten(group) as (type,location), SUM(cube.z) as total;" +
+			"d = filter c by type!='unknown';" +
+			"store d into 'output' using mock.Storage();";
+
+	Util.registerMultiLineQuery(pigServer, query);
+
+	Set<Tuple> expected = ImmutableSet.of(
+		tf.newTuple(Lists.newArrayList("dog", "miami", (long)12)),
+		tf.newTuple(Lists.newArrayList("dog", null, (long)12))
+		);
+
+	List<Tuple> out = data.get("output");
+	for( Tuple tup : out ) {
+	    assertTrue(expected+" contains "+tup, expected.contains(tup));
+	}
+
     }
 
     @Test
@@ -474,7 +531,7 @@ public class TestCubeOperator {
 	ByteArrayOutputStream baos = new ByteArrayOutputStream();
 	PrintStream ps = new PrintStream(baos);
 	pigServer.explain("b", ps);
-	assertTrue(baos.toString().contains("CubeDimensions('NULL')"));
+	assertTrue(baos.toString().contains("CubeDimensions"));
     }
 
     @Test



Mime
View raw message