pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From roh...@apache.org
Subject svn commit: r1729739 - in /pig/trunk: ./ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/backend/hadoop/executionengine/tez/plan/ test/org/apache/pig/test/data/GoldenFiles/tez/ test/org/apache/pig/tez/
Date Wed, 10 Feb 2016 21:20:16 GMT
Author: rohini
Date: Wed Feb 10 21:20:15 2016
New Revision: 1729739

URL: http://svn.apache.org/viewvc?rev=1729739&view=rev
Log:
PIG-4759: Fix Classresolution_1 e2e failure (rohini)

Added:
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
    pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
    pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Feb 10 21:20:15 2016
@@ -89,6 +89,8 @@ PIG-4639: Add better parser for Apache H
 
 BUG FIXES
 
+PIG-4759: Fix Classresolution_1 e2e failure (rohini)
+
 PIG-4800: EvalFunc.getCacheFiles() fails for different namenode (rohini)
 
 PIG-4790: Join after union fail due to UnionOptimizer (rohini)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Wed Feb 10 21:20:15 2016
@@ -493,7 +493,7 @@ Gtab = .... aggregation function
 STORE Gtab INTO '/user/vxj/finalresult2';
 </source>
 
-<p>To make the script works, add the exec statement.  </p>
+<p>To make the script work, add the exec statement. </p>
 
 <source>
 A = LOAD '/user/xxx/firstinput' USING PigStorage();
@@ -518,6 +518,11 @@ Ftab = group ....
 Gtab = .... aggregation function
 STORE Gtab INTO '/user/vxj/finalresult2';
 </source>
+
+<p>If the STORE and LOAD both had exact matching file paths, Pig will recognize the
implicit dependency
+and launch two different mapreduce jobs/Tez DAGs with the second job depending on the output
of the first one. 
+exec is not required to be specified in that case.</p>
+
 </section>
 </section>
 </section>

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
(original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
Wed Feb 10 21:20:15 2016
@@ -363,52 +363,18 @@ public class TezCompiler extends PhyPlan
                     String msg = "Predecessor of load should be a store or native oper. Got
" + p.getClass();
                     throw new PlanException(msg, errCode, PigException.BUG);
                 }
-                if (p instanceof POStore) {
-                    PhysicalOperator store = oper.plan.getOperator(p.getOperatorKey());
-                    // replace POStore to POValueOutputTez, convert the tezOperator to splitter
-                    oper.plan.disconnect(oper.plan.getPredecessors(store).get(0), store);
-                    oper.plan.remove(store);
-                    POValueOutputTez valueOutput = new POValueOutputTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
-                    oper.plan.addAsLeaf(valueOutput);
-                    oper.setSplitter(true);
-
-                    // Create a splittee of store only
-                    TezOperator storeOnlyTezOperator = getTezOp();
-                    PhysicalPlan storeOnlyPhyPlan = new PhysicalPlan();
-                    POValueInputTez valueInput = new POValueInputTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
-                    valueInput.setInputKey(oper.getOperatorKey().toString());
-                    storeOnlyPhyPlan.addAsLeaf(valueInput);
-                    storeOnlyPhyPlan.addAsLeaf(store);
-                    storeOnlyTezOperator.plan = storeOnlyPhyPlan;
-                    tezPlan.add(storeOnlyTezOperator);
-                    phyToTezOpMap.put(p, storeOnlyTezOperator);
-
-                    // Create new operator as second splittee
-                    curTezOp = getTezOp();
-                    POValueInputTez valueInput2 = new POValueInputTez(new OperatorKey(scope,nig.getNextNodeId(scope)));
-                    valueInput2.setInputKey(oper.getOperatorKey().toString());
-                    curTezOp.plan.add(valueInput2);
-                    tezPlan.add(curTezOp);
-
-                    // Connect splitter to splittee
-                    TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, oper, storeOnlyTezOperator);
-                    TezCompilerUtil.configureValueOnlyTupleOutput(edge,  DataMovementType.ONE_TO_ONE);
-                    storeOnlyTezOperator.setRequestedParallelismByReference(oper);
-
-                    edge = TezCompilerUtil.connect(tezPlan, oper, curTezOp);
-                    TezCompilerUtil.configureValueOnlyTupleOutput(edge,  DataMovementType.ONE_TO_ONE);
-                    curTezOp.setRequestedParallelismByReference(oper);
-                } else if (p instanceof PONative) {
-                    // Need new operator
-                    curTezOp = getTezOp();
-                    curTezOp.plan.add(op);
-                    tezPlan.add(curTezOp);
-
-                    plan.disconnect(op, p);
-                    TezCompilerUtil.connect(tezPlan, oper, curTezOp);
-                    phyToTezOpMap.put(op, curTezOp);
-                    return;
+                curTezOp = getTezOp();
+                curTezOp.plan.add(op);
+                curTezOp.setUseMRMapSettings(true);
+                if (((POLoad) op).getLFile() != null
+                        && ((POLoad) op).getLFile().getFuncSpec() != null) {
+                        curTezOp.UDFs.add(((POLoad)op).getLFile().getFuncSpec().toString());
                 }
+                tezPlan.add(curTezOp);
+                phyToTezOpMap.put(op, curTezOp);
+                plan.disconnect(op, p);
+                TezCompilerUtil.connect(tezPlan, oper, curTezOp);
+                oper.segmentBelow = true;
                 return;
             }
 

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java Wed
Feb 10 21:20:15 2016
@@ -90,6 +90,9 @@ public class TezPrinter extends TezOpPla
             printer.setVerbose(isVerbose);
             printer.visit();
             mStream.println();
+        } else if (!tezOper.isVertexGroup()) {
+            // For things like NativeTezOper
+            mStream.println("" + tezOper);
         }
     }
 

Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld?rev=1729739&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld Wed Feb 10
21:20:15 2016
@@ -0,0 +1,43 @@
+#--------------------------------------------------
+# There are 2 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-18
+
+Tez vertex scope-18
+# Plan on vertex
+a: Store(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-8
+|
+|---a: New For Each(false,false)[bag] - scope-7
+    |   |
+    |   Cast[int] - scope-2
+    |   |
+    |   |---Project[bytearray][0] - scope-1
+    |   |
+    |   Cast[int] - scope-5
+    |   |
+    |   |---Project[bytearray][1] - scope-4
+    |
+    |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
+#--------------------------------------------------
+# TEZ DAG plan: pig-1_scope-1
+#--------------------------------------------------
+Tez vertex scope-19
+
+Tez vertex scope-19
+# Plan on vertex
+b: Store(file:///tmp/output1:org.apache.pig.builtin.PigStorage) - scope-17
+|
+|---b: New For Each(false,false)[bag] - scope-16
+    |   |
+    |   Cast[int] - scope-11
+    |   |
+    |   |---Project[bytearray][0] - scope-10
+    |   |
+    |   Cast[int] - scope-14
+    |   |
+    |   |---Project[bytearray][1] - scope-13
+    |
+    |---b: Load(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-9

Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld?rev=1729739&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld (added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld Wed Feb 10 21:20:15
2016
@@ -0,0 +1,42 @@
+#--------------------------------------------------
+# There are 3 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-12
+
+Tez vertex scope-12
+# Plan on vertex
+a: Store(/tmp/table_testNativeMRJobSimple_input:org.apache.pig.builtin.PigStorage) - scope-8
+|
+|---a: New For Each(false,false)[bag] - scope-7
+    |   |
+    |   Cast[int] - scope-2
+    |   |
+    |   |---Project[bytearray][0] - scope-1
+    |   |
+    |   Cast[int] - scope-5
+    |   |
+    |   |---Project[bytearray][1] - scope-4
+    |
+    |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
+#--------------------------------------------------
+# TEZ DAG plan: pig-1_scope-1
+#--------------------------------------------------
+Tez vertex scope-13
+
+Tez vertex scope-13
+Tez - scope-13
+ Native Tez - jar : hadoop-examples.jar, params: [wordcount, /tmp/table_testNativeMRJobSimple_input,
/tmp/table_testNativeMRJobSimple_output]:
+Plan Empty
+#--------------------------------------------------
+# TEZ DAG plan: pig-2_scope-2
+#--------------------------------------------------
+Tez vertex scope-14
+
+Tez vertex scope-14
+# Plan on vertex
+b: Store(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-11
+|
+|---b: Load(/tmp/table_testNativeMRJobSimple_output:org.apache.pig.builtin.PigStorage) -
scope-10

Modified: pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Wed Feb 10 21:20:15 2016
@@ -89,6 +89,27 @@ public class TestTezCompiler {
     }
 
     @Test
+    public void testStoreLoad() throws Exception {
+        String query =
+                "a = load 'file:///tmp/input' as (x:int, y:int);" +
+                "store a into 'file:///tmp/output';" +
+                "b = load 'file:///tmp/output' as (x:int, y:int);" +
+                "store b into 'file:///tmp/output1';";
+
+        run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld");
+    }
+
+    @Test
+    public void testNative() throws Exception {
+        String query =
+                "a = load 'file:///tmp/input' as (x:int, y:int);" +
+                "b = native 'hadoop-examples.jar' Store a into '/tmp/table_testNativeMRJobSimple_input'
Load '/tmp/table_testNativeMRJobSimple_output' `wordcount /tmp/table_testNativeMRJobSimple_input
/tmp/table_testNativeMRJobSimple_output`;" +
+                "store b into 'file:///tmp/output';";
+
+        run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld");
+    }
+
+    @Test
     public void testFilter() throws Exception {
         String query =
                 "a = load 'file:///tmp/input' as (x:int, y:int);" +



Mime
View raw message