pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From the...@apache.org
Subject svn commit: r1149237 - in /pig/trunk: CHANGES.txt src/org/apache/pig/builtin/JsonMetadata.java test/org/apache/pig/test/TestLoad.java
Date Thu, 21 Jul 2011 15:50:31 GMT
Author: thejas
Date: Thu Jul 21 15:50:29 2011
New Revision: 1149237

URL: http://svn.apache.org/viewvc?rev=1149237&view=rev
Log:
PIG-2179: tests in TestLoad are failing

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/JsonMetadata.java
    pig/trunk/test/org/apache/pig/test/TestLoad.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1149237&r1=1149236&r2=1149237&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jul 21 15:50:29 2011
@@ -89,6 +89,8 @@ PIG-2011: Speed up TestTypedMap.java (dv
 
 BUG FIXES
 
+PIG-2179: tests in TestLoad are failing (thejas)
+
 PIG-2146: POStore.getSchema() returns null because of which PigOutputCommitter 
  is not storing schema while cleanup (thejas)
 

Modified: pig/trunk/src/org/apache/pig/builtin/JsonMetadata.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/JsonMetadata.java?rev=1149237&r1=1149236&r2=1149237&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/JsonMetadata.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/JsonMetadata.java Thu Jul 21 15:50:29 2011
@@ -20,6 +20,8 @@ package org.apache.pig.builtin;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -29,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.Expression;
+import org.apache.pig.LoadFunc;
 import org.apache.pig.LoadMetadata;
 import org.apache.pig.StoreMetadata;
 import org.apache.pig.ResourceSchema;
@@ -70,9 +73,10 @@ public class JsonMetadata implements Loa
     }
 
     /**.
-     * Given a path, which may represent a glob pattern, a directory, or a file, this method
-     * finds the set of relevant metadata files on the storage system. The algorithm for
finding the
-     * metadata file is as follows:
+     * Given a path, which may represent a glob pattern, a directory, 
+     * comma separated files/glob patterns or a file, this method
+     * finds the set of relevant metadata files on the storage system. 
+     * The algorithm for finding the metadata file is as follows:
      * <p>
      * For each file represented by the path (either directly, or via a glob):
      *   If parentPath/prefix.fileName exists, use that as the metadata file.
@@ -90,38 +94,47 @@ public class JsonMetadata implements Loa
      */
     protected Set<ElementDescriptor> findMetaFile(String path, String prefix, Configuration
conf)
         throws IOException {
-        DataStorage storage = new HDataStorage(ConfigurationUtil.toProperties(conf));
-        String fullPath = FileLocalizer.fullPath(path, storage);
         Set<ElementDescriptor> metaFileSet = new HashSet<ElementDescriptor>();
-        if(storage.isContainer(fullPath)) {
-            ElementDescriptor metaFilePath = storage.asElement(fullPath, prefix);
-            if (metaFilePath.exists()) {
-                metaFileSet.add(metaFilePath);
+        String[] locations = LoadFunc.getPathStrings(path);
+        for (String loc : locations) {
+            DataStorage storage;
+            try {
+                storage = new HDataStorage(new URI(loc), ConfigurationUtil.toProperties(conf));
+            } catch (URISyntaxException e) {
+                throw new IOException("Unable to read " + loc, e);
             }
-        } else {
-            ElementDescriptor[] descriptors = storage.asCollection(path);
-            for(ElementDescriptor descriptor : descriptors) {
-                String fileName = null, parentName = null;
-                ContainerDescriptor parentContainer = null;
-                if (descriptor instanceof HFile) {
-                    Path descriptorPath = ((HFile) descriptor).getPath();
-                    fileName = descriptorPath.getName();
-                    Path parent = descriptorPath.getParent();
-                    parentName = parent.toString();
-                    parentContainer = new HDirectory((HDataStorage)storage,parent);
-                }
-                ElementDescriptor metaFilePath = storage.asElement(parentName, prefix+"."+fileName);
+            String fullPath = FileLocalizer.fullPath(loc, storage);
 
-                // if the file has a custom schema, use it
+            if(storage.isContainer(fullPath)) {
+                ElementDescriptor metaFilePath = storage.asElement(fullPath, prefix);
                 if (metaFilePath.exists()) {
                     metaFileSet.add(metaFilePath);
-                    continue;
                 }
+            } else {
+                ElementDescriptor[] descriptors = storage.asCollection(loc);
+                for(ElementDescriptor descriptor : descriptors) {
+                    String fileName = null, parentName = null;
+                    ContainerDescriptor parentContainer = null;
+                    if (descriptor instanceof HFile) {
+                        Path descriptorPath = ((HFile) descriptor).getPath();
+                        fileName = descriptorPath.getName();
+                        Path parent = descriptorPath.getParent();
+                        parentName = parent.toString();
+                        parentContainer = new HDirectory((HDataStorage)storage,parent);
+                    }
+                    ElementDescriptor metaFilePath = storage.asElement(parentName, prefix+"."+fileName);
 
-                // if no custom schema, try the parent directory
-                metaFilePath = storage.asElement(parentContainer, prefix);
-                if (metaFilePath.exists()) {
-                    metaFileSet.add(metaFilePath);
+                    // if the file has a custom schema, use it
+                    if (metaFilePath.exists()) {
+                        metaFileSet.add(metaFilePath);
+                        continue;
+                    }
+
+                    // if no custom schema, try the parent directory
+                    metaFilePath = storage.asElement(parentContainer, prefix);
+                    if (metaFilePath.exists()) {
+                        metaFileSet.add(metaFilePath);
+                    }
                 }
             }
         }

Modified: pig/trunk/test/org/apache/pig/test/TestLoad.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestLoad.java?rev=1149237&r1=1149236&r2=1149237&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestLoad.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestLoad.java Thu Jul 21 15:50:29 2011
@@ -258,8 +258,8 @@ public class TestLoad extends junit.fram
     
     @Test
     public void testNonDfsLocation() throws Exception {
-        String nonDfsUrl = "har://hdfs-namenode/user/foo/";
-        String query = "a = load '" + nonDfsUrl + "';" +
+        String nonDfsUrl = "har:///user/foo/f.har";
+        String query = "a = load '" + nonDfsUrl + "' using PigStorage('\t','-noschema');"
+
                        "store a into 'output';";
         LogicalPlan lp = Util.buildLp(servers[1], query);
         LOLoad load = (LOLoad) lp.getSources().get(0);



Mime
View raw message