hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1617869 - in /hive/trunk/serde: if/ src/gen/thrift/gen-cpp/ src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/ src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/ src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/ src/gen/thrift/g...
Date Thu, 14 Aug 2014 03:44:38 GMT
Author: brock
Date: Thu Aug 14 03:44:38 2014
New Revision: 1617869

URL: http://svn.apache.org/r1617869
Log:
HIVE-7142 - Hive multi serialization encoding support (Chengxiang Li via Brock)

Added:
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java
Modified:
    hive/trunk/serde/if/serde.thrift
    hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp
    hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h
    hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
    hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php
    hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py
    hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java

Modified: hive/trunk/serde/if/serde.thrift
URL: http://svn.apache.org/viewvc/hive/trunk/serde/if/serde.thrift?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/if/serde.thrift (original)
+++ hive/trunk/serde/if/serde.thrift Thu Aug 14 03:44:38 2014
@@ -30,6 +30,7 @@ const string SERIALIZATION_NULL_FORMAT =
 const string SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest"
 const string SERIALIZATION_SORT_ORDER = "serialization.sort.order"
 const string SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"
+const string SERIALIZATION_ENCODING = "serialization.encoding"
 
 const string FIELD_DELIM = "field.delim"
 const string COLLECTION_DELIM = "colelction.delim"

Modified: hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp (original)
+++ hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp Thu Aug 14 03:44:38 2014
@@ -27,6 +27,8 @@ serdeConstants::serdeConstants() {
 
   SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object";
 
+  SERIALIZATION_ENCODING = "serialization.encoding";
+
   FIELD_DELIM = "field.delim";
 
   COLLECTION_DELIM = "colelction.delim";

Modified: hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h (original)
+++ hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h Thu Aug 14 03:44:38 2014
@@ -23,6 +23,7 @@ class serdeConstants {
   std::string SERIALIZATION_LAST_COLUMN_TAKES_REST;
   std::string SERIALIZATION_SORT_ORDER;
   std::string SERIALIZATION_USE_JSON_OBJECTS;
+  std::string SERIALIZATION_ENCODING;
   std::string FIELD_DELIM;
   std::string COLLECTION_DELIM;
   std::string LINE_DELIM;

Modified: hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
(original)
+++ hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
Thu Aug 14 03:44:38 2014
@@ -49,6 +49,8 @@ public class serdeConstants {
 
   public static final String SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object";
 
+  public static final String SERIALIZATION_ENCODING = "serialization.encoding";
+
   public static final String FIELD_DELIM = "field.delim";
 
   public static final String COLLECTION_DELIM = "colelction.delim";

Modified: hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php (original)
+++ hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php Thu Aug
14 03:44:38 2014
@@ -32,6 +32,8 @@ $GLOBALS['serde_CONSTANTS']['SERIALIZATI
 
 $GLOBALS['serde_CONSTANTS']['SERIALIZATION_USE_JSON_OBJECTS'] = "serialization.use.json.object";
 
+$GLOBALS['serde_CONSTANTS']['SERIALIZATION_ENCODING'] = "serialization.encoding";
+
 $GLOBALS['serde_CONSTANTS']['FIELD_DELIM'] = "field.delim";
 
 $GLOBALS['serde_CONSTANTS']['COLLECTION_DELIM'] = "colelction.delim";

Modified: hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py (original)
+++ hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py Thu Aug
14 03:44:38 2014
@@ -17,6 +17,7 @@ SERIALIZATION_NULL_FORMAT = "serializati
 SERIALIZATION_LAST_COLUMN_TAKES_REST = "serialization.last.column.takes.rest"
 SERIALIZATION_SORT_ORDER = "serialization.sort.order"
 SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"
+SERIALIZATION_ENCODING = "serialization.encoding"
 FIELD_DELIM = "field.delim"
 COLLECTION_DELIM = "colelction.delim"
 LINE_DELIM = "line.delim"

Modified: hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb (original)
+++ hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb Thu Aug 14 03:44:38 2014
@@ -23,6 +23,8 @@ SERIALIZATION_SORT_ORDER = %q"serializat
 
 SERIALIZATION_USE_JSON_OBJECTS = %q"serialization.use.json.object"
 
+SERIALIZATION_ENCODING = %q"serialization.encoding"
+
 FIELD_DELIM = %q"field.delim"
 
 COLLECTION_DELIM = %q"colelction.delim"

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java?rev=1617869&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java
(added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/AbstractEncodingAwareSerDe.java
Thu Aug 14 03:44:38 2014
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.nio.charset.Charset;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.base.Charsets;
+
+/**
+ * AbstractEncodingAwareSerDe aware the encoding from table properties,
+ * transform data from specified charset to UTF-8 during serialize, and
+ * transform data from UTF-8 to specified charset during deserialize.
+ */
+public abstract class AbstractEncodingAwareSerDe extends AbstractSerDe {
+
+  protected Charset charset;
+
+  @Override
+  @Deprecated
+  public void initialize(Configuration conf, Properties tbl)
+      throws SerDeException {
+    charset = Charset.forName(tbl.getProperty(serdeConstants.SERIALIZATION_ENCODING, "UTF-8"));
+  }
+
+  @Override
+  public final Writable serialize(Object obj, ObjectInspector objInspector)
+      throws SerDeException {
+    Writable result = doSerialize(obj, objInspector);
+    if (!this.charset.equals(Charsets.UTF_8)) {
+      result = transformFromUTF8(result);
+    }
+    return result;
+  }
+
+  /**
+   * transform Writable data from UTF-8 to charset before serialize.
+   * @param blob
+   * @return
+   */
+  protected abstract Writable transformFromUTF8(Writable blob);
+
+  protected abstract Writable doSerialize(Object obj, ObjectInspector objInspector) throws
SerDeException;
+
+  @Override
+  public final Object deserialize(Writable blob) throws SerDeException {
+    if (!this.charset.equals(Charsets.UTF_8)) {
+      blob = transformToUTF8(blob);
+    }
+    return doDeserialize(blob);
+  }
+
+  /**
+   * transform Writable data from charset to UTF-8 before doDeserialize.
+   * @param blob
+   * @return
+   */
+  protected abstract Writable transformToUTF8(Writable blob);
+
+  protected abstract Object doDeserialize(Writable blob) throws SerDeException;
+}

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java Thu Aug
14 03:44:38 2014
@@ -46,7 +46,7 @@ public class DelimitedJSONSerDe extends 
    * Not implemented.
    */
   @Override
-  public Object deserialize(Writable field) throws SerDeException {
+  public Object doDeserialize(Writable field) throws SerDeException {
     LOG.error("DelimitedJSONSerDe cannot deserialize.");
     throw new SerDeException("DelimitedJSONSerDe cannot deserialize.");
   }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java Thu Aug 14 03:44:38
2014
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.serde2;
 
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -523,4 +524,12 @@ public final class SerDeUtils {
   private SerDeUtils() {
     // prevent instantiation
   }
+
+  public static Text transformTextToUTF8(Text text, Charset previousCharset) {
+    return new Text(new String(text.getBytes(), previousCharset));
+  }
+
+  public static Text transformTextFromUTF8(Text text, Charset targetCharset) {
+    return new Text(new String(text.getBytes()).getBytes(targetCharset));
+  }
 }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java?rev=1617869&r1=1617868&r2=1617869&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java Thu
Aug 14 03:44:38 2014
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.serde2.lazy;
 
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -29,11 +30,13 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
 import org.apache.hadoop.hive.serde2.ByteStream;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -61,7 +64,7 @@ import org.apache.hadoop.io.Writable;
  * Also LazySimpleSerDe outputs typed columns instead of treating all columns as
  * String like MetadataTypedColumnsetSerDe.
  */
-public class LazySimpleSerDe extends AbstractSerDe {
+public class LazySimpleSerDe extends AbstractEncodingAwareSerDe {
 
   public static final Log LOG = LogFactory.getLog(LazySimpleSerDe.class
       .getName());
@@ -187,6 +190,8 @@ public class LazySimpleSerDe extends Abs
   public void initialize(Configuration job, Properties tbl)
       throws SerDeException {
 
+    super.initialize(job, tbl);
+
     serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, getClass()
         .getName());
 
@@ -330,7 +335,7 @@ public class LazySimpleSerDe extends Abs
    * @see SerDe#deserialize(Writable)
    */
   @Override
-  public Object deserialize(Writable field) throws SerDeException {
+  public Object doDeserialize(Writable field) throws SerDeException {
     if (byteArrayRef == null) {
       byteArrayRef = new ByteArrayRef();
     }
@@ -375,7 +380,7 @@ public class LazySimpleSerDe extends Abs
    * @see SerDe#serialize(Object, ObjectInspector)
    */
   @Override
-  public Writable serialize(Object obj, ObjectInspector objInspector)
+  public Writable doSerialize(Object obj, ObjectInspector objInspector)
       throws SerDeException {
 
     if (objInspector.getCategory() != Category.STRUCT) {
@@ -584,4 +589,16 @@ public class LazySimpleSerDe extends Abs
     return stats;
 
   }
+
+  @Override
+  protected Writable transformFromUTF8(Writable blob) {
+    Text text = (Text)blob;
+    return SerDeUtils.transformTextFromUTF8(text, this.charset);
+  }
+
+  @Override
+  protected Writable transformToUTF8(Writable blob) {
+    Text text = (Text)blob;
+    return SerDeUtils.transformTextToUTF8(text, this.charset);
+  }
 }



Mime
View raw message