incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r1367226 - in /incubator/hcatalog/branches/branch-0.4: CHANGES.txt src/java/org/apache/hcatalog/data/JsonSerDe.java src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Date Mon, 30 Jul 2012 19:01:13 GMT
Author: gates
Date: Mon Jul 30 19:01:12 2012
New Revision: 1367226

URL: http://svn.apache.org/viewvc?rev=1367226&view=rev
Log:
HCATALOG-436 JSON SerDe column misnaming on CTAS

Modified:
    incubator/hcatalog/branches/branch-0.4/CHANGES.txt
    incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
    incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java

Modified: incubator/hcatalog/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/CHANGES.txt?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.4/CHANGES.txt Mon Jul 30 19:01:12 2012
@@ -51,6 +51,8 @@ Trunk (unreleased changes)
   OPTIMIZATIONS
 
   BUG FIXES
+  HCAT-436 JSON SerDe column misnaming on CTAS (khorgath via gates)
+
   HCAT-449 HCatLoader is mistakenly identifying Configuration parameters to store (cdrome
via traviscrawford)
 
   HCAT-452 HCat_Drop_Table_3 does not get initialized properly (cdrome via toffer)  

Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
(original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/data/JsonSerDe.java
Mon Jul 30 19:01:12 2012
@@ -26,8 +26,11 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -167,10 +170,42 @@ public class JsonSerDe implements SerDe 
       throw new IOException("Field name expected");
     }
     String fieldName = p.getText();
-    int fpos = s.getPosition(fieldName);
+    int fpos;
+    try {
+      fpos = s.getPosition(fieldName);
+    } catch (NullPointerException npe){
+      fpos = getPositionFromHiveInternalColumnName(fieldName);
+      LOG.debug("NPE finding position for field [{}] in schema [{}]",fieldName,s);
+      if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))){
+        LOG.error("Hive internal column name {} and position "
+            +"encoding {} for the column name are at odds",fieldName,fpos);
+        throw npe;
+      }
+      if (fpos == -1){
+        return; // unknown field, we return.
+      }
+    }
     HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
+    Object currField = extractCurrentField(p, null, hcatFieldSchema,false);
+    r.set(fpos,currField);
+  }
 
-    r.set(fpos,extractCurrentField(p, null, hcatFieldSchema,false));
+  public String getHiveInternalColumnName(int fpos) {
+    return HiveConf.getColumnInternalName(fpos);
+  }
+
+  public int getPositionFromHiveInternalColumnName(String internalName) {
+//    return HiveConf.getPositionFromInternalName(fieldName);
+    // The above line should have been all the implementation that
+    // we need, but due to a bug in that impl which recognizes
+    // only single-digit columns, we need another impl here.
+    Pattern internalPattern = Pattern.compile("_col([0-9]+)");
+    Matcher m = internalPattern.matcher(internalName);
+    if (!m.matches()){
+      return -1;
+    } else {
+      return Integer.parseInt(m.group(1));
+    }
   }
 
   /**

Modified: incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java?rev=1367226&r1=1367225&r2=1367226&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
(original)
+++ incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Mon Jul 30 19:01:12 2012
@@ -26,6 +26,7 @@ import java.util.Properties;
 import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.io.Writable;
 import org.slf4j.Logger;
@@ -146,4 +147,67 @@ public class TestJsonSerDe extends TestC
 
   }
 
+  public void testRobustRead() throws Exception {
+    /**
+     *  This test has been added to account for HCATALOG-436
+     *  We write out columns with "internal column names" such
+     *  as "_col0", but try to read with retular column names.
+     */
+    
+    Configuration conf = new Configuration();
+
+    for (Pair<Properties,HCatRecord> e : getData()){
+      Properties tblProps = e.first;
+      HCatRecord r = e.second;
+      
+      Properties internalTblProps = new Properties();
+      for (Map.Entry pe : tblProps.entrySet()){
+        if (!pe.getKey().equals(Constants.LIST_COLUMNS)){
+          internalTblProps.put(pe.getKey(), pe.getValue());
+        } else {
+          internalTblProps.put(pe.getKey(),getInternalNames((String) pe.getValue()));
+        }
+      }
+      
+      LOG.info("orig tbl props:{}",tblProps);
+      LOG.info("modif tbl props:{}",internalTblProps);
+
+      JsonSerDe wjsd = new JsonSerDe();
+      wjsd.initialize(conf, internalTblProps);
+
+      JsonSerDe rjsd = new JsonSerDe();
+      rjsd.initialize(conf, tblProps);
+
+      LOG.info("ORIG:{}",r);
+
+      Writable s = wjsd.serialize(r,wjsd.getObjectInspector());
+      LOG.info("ONE:{}",s);
+
+      Object o1 = wjsd.deserialize(s);
+      LOG.info("deserialized ONE : {} ", o1);
+
+      Object o2 = rjsd.deserialize(s);
+      LOG.info("deserialized TWO : {} ", o2);
+      assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2));
+    }
+    
+  }
+  
+  String getInternalNames(String columnNames){
+    if (columnNames == null) { 
+      return null; 
+    }
+    if (columnNames.isEmpty()) { 
+      return ""; 
+    }
+    
+    StringBuffer sb = new StringBuffer();
+    int numStrings = columnNames.split(",").length;
+    sb.append("_col0");
+    for (int i = 1; i < numStrings ; i++ ){
+      sb.append(",");
+      sb.append(HiveConf.getColumnInternalName(i));
+    }
+    return sb.toString();
+  }
 }



Mime
View raw message