pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cheol...@apache.org
Subject svn commit: r1605208 - in /pig/trunk: ./ conf/ src/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ test/org/apache/pig/test/
Date Tue, 24 Jun 2014 21:50:52 GMT
Author: cheolsoo
Date: Tue Jun 24 21:50:52 2014
New Revision: 1605208

URL: http://svn.apache.org/r1605208
Log:
PIG-4003: Error is thrown by JobStats.getOutputSize() when storing to a Hive table (cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/conf/pig.properties
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/FileBasedOutputSizeReader.java
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigStatsOutputSizeReader.java
    pig/trunk/src/pig-default.properties
    pig/trunk/test/org/apache/pig/test/TestMRJobStats.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jun 24 21:50:52 2014
@@ -40,6 +40,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-4003: Error is thrown by JobStats.getOutputSize() when storing to a Hive table (cheolsoo)
+
 PIG-4035: Fix CollectedGroup e2e tests for tez (daijy)
 
 PIG-4034: Exclude TestTezAutoParallelism when -Dhadoopversion=20 (cheolsoo)

Modified: pig/trunk/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/trunk/conf/pig.properties?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/conf/pig.properties (original)
+++ pig/trunk/conf/pig.properties Tue Jun 24 21:50:52 2014
@@ -454,6 +454,7 @@
 # first one whose supports() method returns true will be used.
 #
 # pig.stats.output.size.reader=<fully qualified class name of a PigStatsOutputSizeReader
implementation>
+# pig.stats.output.size.reader.unsupported=<comma separated list of StoreFuncs that are
not supported by this reader>
 
 #
 # Override hadoop configs programatically

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/FileBasedOutputSizeReader.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/FileBasedOutputSizeReader.java?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/FileBasedOutputSizeReader.java
(original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/FileBasedOutputSizeReader.java
Tue Jun 24 21:50:52 2014
@@ -43,6 +43,18 @@ public class FileBasedOutputSizeReader i
      */
     @Override
     public boolean supports(POStore sto, Configuration conf) {
+        String storeFuncName = sto.getStoreFunc().getClass().getCanonicalName();
+        // Some store functions do not support file-based output reader (e.g.
+        // HCatStorer), so they should be excluded.
+        String unsupported = conf.get(
+                PigStatsOutputSizeReader.OUTPUT_SIZE_READER_UNSUPPORTED);
+        if (unsupported != null) {
+            for (String s : unsupported.split(",")) {
+                if (s.equalsIgnoreCase(storeFuncName)) {
+                    return false;
+                }
+            }
+        }
         return UriUtil.isHDFSFileOrLocalOrS3N(getLocationUri(sto), conf);
     }
 
@@ -54,8 +66,8 @@ public class FileBasedOutputSizeReader i
     @Override
     public long getOutputSize(POStore sto, Configuration conf) throws IOException {
         if (!supports(sto, conf)) {
-            log.warn("'" + sto.getStoreFunc().getClass().getName()
-                    + "' is not supported by " + getClass().getName());
+            log.warn("'" + sto.getStoreFunc().getClass().getCanonicalName()
+                    + "' is not supported by " + getClass().getCanonicalName());
             return -1;
         }
 

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigStatsOutputSizeReader.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigStatsOutputSizeReader.java?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigStatsOutputSizeReader.java
(original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigStatsOutputSizeReader.java
Tue Jun 24 21:50:52 2014
@@ -37,6 +37,7 @@ import org.apache.pig.classification.Int
 public interface PigStatsOutputSizeReader {
 
     static final String OUTPUT_SIZE_READER_KEY = "pig.stats.output.size.reader";
+    static final String OUTPUT_SIZE_READER_UNSUPPORTED = "pig.stats.output.size.reader.unsupported";
 
     /**
      * Returns whether the given PSStore is supported by this output size reader

Modified: pig/trunk/src/pig-default.properties
URL: http://svn.apache.org/viewvc/pig/trunk/src/pig-default.properties?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/src/pig-default.properties (original)
+++ pig/trunk/src/pig-default.properties Tue Jun 24 21:50:52 2014
@@ -55,3 +55,6 @@ pig.disable.counter=false
 pig.sql.type=hcat
 
 pig.output.committer.recovery.support=false
+
+pig.stats.output.size.reader=org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.FileBasedOutputSizeReader
+pig.stats.output.size.reader.unsupported=org.apache.hcatalog.pig.HCatStorer,org.apache.hive.hcatalog.pig.HCatStorer

Modified: pig/trunk/test/org/apache/pig/test/TestMRJobStats.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestMRJobStats.java?rev=1605208&r1=1605207&r2=1605208&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestMRJobStats.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestMRJobStats.java Tue Jun 24 21:50:52 2014
@@ -301,4 +301,27 @@ public class TestMRJobStats {
         assertEquals("The dummy output size reader always returns " + DummyOutputSizeReader.SIZE,
                 DummyOutputSizeReader.SIZE, outputSize);
     }
+
+    @Test
+    public void testGetOuputSizeUsingNonFileBasedStorage5() throws Exception {
+        Configuration conf = new Configuration();
+
+        long size = 2L * 1024 * 1024 * 1024;
+        long outputSize = JobStats.getOutputSize(
+                createPOStoreForFileBasedSystem(size, new PigStorageWithStatistics(), conf),
conf);
+
+        // By default, FileBasedOutputSizeReader is used to compute the size of output.
+        assertEquals("The returned output size is expected to be the same as the file size",
+                size, outputSize);
+
+        // Now add PigStorageWithStatistics to the unsupported store funcs list, and
+        // verify that JobStats.getOutputSize() returns -1.
+        conf.set(PigStatsOutputSizeReader.OUTPUT_SIZE_READER_UNSUPPORTED,
+                PigStorageWithStatistics.class.getName());
+
+        outputSize = JobStats.getOutputSize(
+                createPOStoreForFileBasedSystem(size, new PigStorageWithStatistics(), conf),
conf);
+        assertEquals("The default output size reader returns -1 for unsupported store funcs",
+                -1, outputSize);
+    }
 }



Mime
View raw message