pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From knogu...@apache.org
Subject svn commit: r1796191 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/PigStorage.java test/org/apache/pig/test/TestPigStorage.java
Date Thu, 25 May 2017 20:15:14 GMT
Author: knoguchi
Date: Thu May 25 20:15:13 2017
New Revision: 1796191

URL: http://svn.apache.org/viewvc?rev=1796191&view=rev
Log:
PIG-5231: PigStorage with -schema may produce inconsistent outputs with more fields (knoguchi)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/PigStorage.java
    pig/trunk/test/org/apache/pig/test/TestPigStorage.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu May 25 20:15:13 2017
@@ -101,6 +101,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5231: PigStorage with -schema may produce inconsistent outputs with more fields (knoguchi)
+
 PIG-5224: Extra foreach from ColumnPrune preventing Accumulator usage (knoguchi)
 
 PIG-5235: Typecast with as-clause fails for tuple/bag with an empty schema (knoguchi)

Modified: pig/trunk/src/org/apache/pig/builtin/PigStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/PigStorage.java?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/PigStorage.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/PigStorage.java Thu May 25 20:15:13 2017
@@ -334,6 +334,18 @@ LoadPushDown, LoadMetadata, StoreMetadat
                     tupleIdx++;
                 }
             }
+            // If input record somehow has more fields than the provided schema
+            // drop the extra fields
+            if( tup.size() > fieldSchemas.length ) {
+                int lastindex = tup.size() - 1;
+                List<Object> list = tup.getAll();
+                for(int i = lastindex; i >= fieldSchemas.length ; i--) {
+                    list.remove(i);
+                }
+                // Tuple.getAll() may not return reference to the interal List
+                // so creating a new Tuple.
+                tup =  mTupleFactory.newTupleNoCopy(list);
+            }
         }
         return tup;
     }

Modified: pig/trunk/test/org/apache/pig/test/TestPigStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPigStorage.java?rev=1796191&r1=1796190&r2=1796191&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPigStorage.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPigStorage.java Thu May 25 20:15:13 2017
@@ -789,4 +789,35 @@ public class TestPigStorage  {
         pig.store("a", datadir + "aout", "PigStorage(',')");
     }
 
+    @Test
+    public void testPigStroageSchemaWithMultipleSchema() throws Exception {
+        pigContext.connect();
+        String query = "A = LOAD '" + datadir + "originput' using PigStorage(',') as (f1:chararray,
f2:int);"
+                + "B = FOREACH A generate f1, f2, 3 as (f3:int);";
+        pig.registerQuery(query);
+        pig.store("A", datadir + "aout", "PigStorage('\\t', '-schema')");
+        pig.store("B", datadir + "bout", "PigStorage('\\t', '-schema')");
+
+        // We want to test the case when aout/.pig_schema is chosen for loading
+        // aout AND bout.
+        // Picking of schema is not deterministic given it's picked from a SET.
+        // For this test, we simply delete the other schema.
+        new File(datadir + "bout/.pig_schema" ).delete();
+
+        // Loading from 2 directories, each containing 2 fields and 3 fields
+        // respectively.
+        pig.registerQuery("C = LOAD '" + datadir + "aout," + datadir + "bout ' using PigStorage('\\t',
'-schema');");
+        Schema a_schema = pig.dumpSchema("A");
+        Schema c_schema = pig.dumpSchema("C");
+        Assert.assertEquals("PigStorage schema should pick up the .pig_schema from A", a_schema,
c_schema);
+        Iterator<Tuple> iter = pig.openIterator("C");
+        int counter = 0;
+        while (iter.hasNext()) {
+            Assert.assertEquals("All tuples should only contain 2 fields defined in schema",
+                                2, iter.next().size());
+            counter++;
+        }
+        Assert.assertEquals(20, counter);
+    }
+
 }



Mime
View raw message