incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tra...@apache.org
Subject svn commit: r1363122 - in /incubator/hcatalog/trunk: ./ hcatalog-pig-adapter/ hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/ hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/ src/java/org/apache/hcatalog/common/
Date Wed, 18 Jul 2012 21:21:24 GMT
Author: travis
Date: Wed Jul 18 21:21:24 2012
New Revision: 1363122

URL: http://svn.apache.org/viewvc?rev=1363122&view=rev
Log:
HCATALOG-440 : pig field names for arrays should be configurable

Added:
    incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml
    incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Jul 18 21:21:24 2012
@@ -26,6 +26,8 @@ Trunk (unreleased changes)
   HCAT-328 HCatLoader should report its input size so pig can estimate the number of reducers
(traviscrawford via gates)
 
   IMPROVEMENTS
+  HCAT-440 pig field names for arrays should be configurable (traviscrawford)
+
   HCAT-434 Package HCatalog pig support as a separate jar (traviscrawford)
 
   HCAT-341 InitializeInput improvements (traviscrawford)

Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml (original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/ivy.xml Wed Jul 18 21:21:24 2012
@@ -38,7 +38,11 @@
     <dependency org="org.slf4j" name="slf4j-log4j12" rev="${slf4j.version}"/>
 
     <!-- Test dependencies -->
+    <dependency org="org.apache.hive" name="hive-builtins"
+      rev="${hive.version}" conf="test->default"/>
     <dependency org="org.apache.hive" name="hive-cli"
       rev="${hive.version}" conf="test->default"/>
+    <dependency org="org.apache.commons" name="commons-compress"
+      rev="${commons-compress.version}" conf="test->default"/>
   </dependencies>
 </ivy-module>

Modified: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
(original)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java
Wed Jul 18 21:21:24 2012
@@ -194,14 +194,27 @@ public class PigHCatUtil {
     return rfSchema;
   }
 
-  private static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException {
+  protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException
{
     // there are two cases - array<Type> and array<struct<...>>
     // in either case the element type of the array is represented in a
     // tuple field schema in the bag's field schema - the second case (struct)
     // more naturally translates to the tuple - in the first case (array<Type>)
     // we simulate the tuple by putting the single field in a tuple
+
+    Properties props = UDFContext.getUDFContext().getClientSystemProps();
+    String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT;
+    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME))
{
+      innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)
+          .replaceAll("FIELDNAME", hfs.getName());
+    }
+    String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT;
+    if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME))
{
+      innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
+          .replaceAll("FIELDNAME", hfs.getName());
+    }
+
     ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
-    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
       .setDescription("The tuple in the bag")
       .setType(DataType.TUPLE);
     HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
@@ -214,7 +227,7 @@ public class PigHCatUtil {
       bagSubFieldSchemas[0].setSchema(s);
     } else {
       ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
-      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield")
+      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName)
         .setDescription("The inner field in the tuple in the bag")
         .setType(getPigType(arrayElementFieldSchema))
         .setSchema(null); // the element type is not a tuple - so no subschema

Added: incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java?rev=1363122&view=auto
==============================================================================
--- incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
(added)
+++ incubator/hcatalog/trunk/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java
Wed Jul 18 21:21:24 2012
@@ -0,0 +1,72 @@
+package org.apache.hcatalog.pig;
+
+import com.google.common.collect.Lists;
+import junit.framework.Assert;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceSchema.ResourceFieldSchema;
+import org.apache.pig.data.DataType;
+import org.apache.pig.impl.util.UDFContext;
+import org.junit.Test;
+
+public class TestPigHCatUtil {
+
+  @Test
+  public void testGetBagSubSchema() throws Exception {
+
+    // Define the expected schema.
+    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
+        .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+    innerTupleFieldSchemas[0] =
+        new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
+
+    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
+    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
+
+    // Get the actual converted schema.
+    HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(
+        new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+    HCatFieldSchema hCatFieldSchema =
+        new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
+    ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
+
+    Assert.assertEquals(expected.toString(), actual.toString());
+  }
+
+  @Test
+  public void testGetBagSubSchemaConfigured() throws Exception {
+
+    // NOTE: pig-0.8 sets client system properties by actually getting the client
+    // system properties. Starting in pig-0.9 you must pass the properties in.
+    // When updating our pig dependency this will need updated.
+    System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t");
+    System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
+    UDFContext.getUDFContext().setClientSystemProps();
+
+    // Define the expected schema.
+    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
+    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t")
+        .setDescription("The tuple in the bag").setType(DataType.TUPLE);
+
+    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
+    innerTupleFieldSchemas[0] =
+        new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);
+
+    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
+    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
+
+    // Get the actual converted schema.
+    HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList(
+        new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
+    HCatFieldSchema actualHCatFieldSchema =
+        new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null);
+    ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema);
+
+    Assert.assertEquals(expected.toString(), actual.toString());
+  }
+}

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java?rev=1363122&r1=1363121&r2=1363122&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/common/HCatConstants.java Wed Jul
18 21:21:24 2012
@@ -38,6 +38,10 @@ public final class HCatConstants {
   public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter";
   public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ",";
   public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"
;
+  public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name";
+  public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple";
+  public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name";
+  public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield";
 
   //The keys used to store info into the job Configuration
   public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat";



Mime
View raw message