pig-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "liyunzhang_intel (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (PIG-4276) Fix ordering related failures in TestEvalPipeline for Spark
Date Tue, 21 Apr 2015 00:33:59 GMT

    [ https://issues.apache.org/jira/browse/PIG-4276?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14504032#comment-14504032
] 

liyunzhang_intel commented on PIG-4276:
---------------------------------------

[~mohitsabharwal]:
In other places, you introduces an ORDER BY command to sort the GROUP BY or DISTINCT output.
I think it is not very good to change the original script. I think following is better(you
can also ask rohini or other pig committers' suggestions):
{code}
 @Test
    public void testNestedPlan() throws Exception{
        int LOOP_COUNT = 10;
        File tmpFile = Util.createTempFileDelOnExit("test", "txt");
        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
        for(int i = 0; i < LOOP_COUNT; i++) {
            for(int j=0;j<LOOP_COUNT;j+=2){
                ps.println(i+"\t"+j);
                ps.println(i+"\t"+j);
            }
        }
        ps.close();

        pigServer.registerQuery("A = LOAD '"
                + Util.generateURI(tmpFile.toString(), pigContext) + "';");
        pigServer.registerQuery("B = group A by $0;");
        String query = "C = foreach B {"
        + "C1 = filter A by $0 > -1;"
        + "C2 = distinct C1;"
        + "C3 = distinct A;"
        + "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2), SUM(C2.$1),"
+  TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
        + "};";

        pigServer.registerQuery(query);
        Iterator<Tuple> iter = pigServer.openIterator("C");
        if(!iter.hasNext()) Assert.fail("No output found");
        int numIdentity = 0;
        while(iter.hasNext()){
            Tuple t = iter.next();
            Assert.assertEquals((Integer)numIdentity, (Integer)t.get(0));
            Assert.assertEquals((Long)5L, (Long)t.get(2));
            Assert.assertEquals(LOOP_COUNT*2.0, (Double)t.get(3), 0.01);
            Assert.assertEquals(8.0, (Double)t.get(5), 0.01);
            Assert.assertEquals(5L, ((DataBag)t.get(6)).size());
            Assert.assertEquals(7, t.size());
            ++numIdentity;
        }
        Assert.assertEquals(LOOP_COUNT, numIdentity);
    }
{code}

can be
{code}
@Test
    public void testNestedPlan() throws Exception{
        int LOOP_COUNT = 10;
        File tmpFile = Util.createTempFileDelOnExit("test", "txt");
        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
        for(int i = 0; i < LOOP_COUNT; i++) {
            for(int j=0;j<LOOP_COUNT;j+=2){
                ps.println(i+"\t"+j);
                ps.println(i+"\t"+j);
            }
        }
        ps.close();
        pigServer.registerQuery("A = LOAD '"
                + Util.generateURI(tmpFile.toString(), pigContext) + "';");
        pigServer.registerQuery("B = group A by $0;");
        String query = "C = foreach B {"
        + "C1 = filter A by $0 > -1;"
        + "C2 = distinct C1;"
        + "C3 = distinct A;"
        + "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2), SUM(C2.$1),"
+  TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
        + "};";

        pigServer.registerQuery(query);
        Iterator<Tuple> iter = pigServer.openIterator("C");
        if(!iter.hasNext()) Assert.fail("No output found");
        int numIdentity = 0;
        List<String> expectedStrResults = new ArrayList<String>();
        for(int i=0;i<LOOP_COUNT;i++){
            StringBuilder sb = new StringBuilder();
            sb.append("(").append(numIdentity).append(",");
            sb.append("5L").append(",");
            sb.append(Double.toString(LOOP_COUNT*2.0)).append(",");
            sb.append("8.0").append(",");
            sb.append("5L").append(",");
            sb.append("7").append(")");
            expectedStrResults.add(sb.toString());
            ++numIdentity;
        }

        List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(expectedStrResults.toArray(new
String[0]));
        List<Tuple> actualResults = new ArrayList<Tuple>();
		while(iter.hasNext()){
			Tuple t = iter.next();
			Tuple actualTuple = TupleFactory.getInstance().newTuple(6);
			actualTuple.set(0,t.get(0));
			actualTuple.set(1,(Long)t.get(2));
			actualTuple.set(2,(Double)t.get(3));
			actualTuple.set(3,(Double)t.get(5));
			actualTuple.set(4,((DataBag)t.get(6)).size());
			actualTuple.set(5,t.size());
			actualResults.add(actualTuple);
		}
      Util.checkQueryOutputsAfterSort(actualResults.iterator(), expectedResults);
      Assert.assertEquals(LOOP_COUNT, numIdentity);
    }
{code}


> Fix ordering related failures in TestEvalPipeline for Spark
> -----------------------------------------------------------
>
>                 Key: PIG-4276
>                 URL: https://issues.apache.org/jira/browse/PIG-4276
>             Project: Pig
>          Issue Type: Sub-task
>          Components: spark
>            Reporter: liyunzhang_intel
>            Assignee: Mohit Sabharwal
>             Fix For: spark-branch
>
>         Attachments: PIG-4276.patch, TEST-org.apache.pig.test.TestEvalPipeline.txt
>
>
> error log is attached



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message