hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject hive git commit: HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi via Xuefu)
Date Mon, 29 Jun 2015 15:47:03 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 2912a2b56 -> e71fd213c


HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi
via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e71fd213
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e71fd213
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e71fd213

Branch: refs/heads/branch-1
Commit: e71fd213c8925b715386e05c8c2bf62b565c9ab5
Parents: 2912a2b
Author: Xuefu Zhang <xzhang@Cloudera.com>
Authored: Mon Jun 29 08:45:49 2015 -0700
Committer: Xuefu Zhang <xzhang@Cloudera.com>
Committed: Mon Jun 29 08:46:34 2015 -0700

----------------------------------------------------------------------
 data/files/encoding_iso-8859-1.txt              |  4 +++
 .../queries/clientpositive/encoding_nonutf8.q   |  7 ++++
 .../clientpositive/encoding_nonutf8.q.out       | 36 ++++++++++++++++++++
 .../apache/hadoop/hive/serde2/SerDeUtils.java   |  2 +-
 4 files changed, 48 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/data/files/encoding_iso-8859-1.txt
----------------------------------------------------------------------
diff --git a/data/files/encoding_iso-8859-1.txt b/data/files/encoding_iso-8859-1.txt
new file mode 100644
index 0000000..7df6dc2
--- /dev/null
+++ b/data/files/encoding_iso-8859-1.txt
@@ -0,0 +1,4 @@
+M�ller,Thomas
+J�rgensen,J�rgen
+Pe�a,Andr�s
+N�m,F�k

http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/ql/src/test/queries/clientpositive/encoding_nonutf8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encoding_nonutf8.q b/ql/src/test/queries/clientpositive/encoding_nonutf8.q
new file mode 100644
index 0000000..f416760
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encoding_nonutf8.q
@@ -0,0 +1,7 @@
+drop table if exists encodelat1;
+create table encodelat1 (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1');
+load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1;
+select * from encodelat1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/ql/src/test/results/clientpositive/encoding_nonutf8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encoding_nonutf8.q.out b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out
new file mode 100644
index 0000000..63c8b45
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out
@@ -0,0 +1,36 @@
+PREHOOK: query: drop table if exists encodelat1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists encodelat1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table encodelat1 (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encodelat1
+POSTHOOK: query: create table encodelat1 (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encodelat1
+PREHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite
into table encodelat1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@encodelat1
+POSTHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite
into table encodelat1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@encodelat1
+PREHOOK: query: select * from encodelat1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encodelat1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from encodelat1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encodelat1
+#### A masked pattern was here ####
+Müller,Thomas
+Jørgensen,Jørgen
+Peña,Andrés
+Nåm,Fæk

http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
index 8dada5a..40ede1a 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
@@ -551,7 +551,7 @@ public final class SerDeUtils {
   }
 
   public static Text transformTextToUTF8(Text text, Charset previousCharset) {
-    return new Text(new String(text.getBytes(), previousCharset));
+    return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset));
   }
 
   public static Text transformTextFromUTF8(Text text, Charset targetCharset) {


Mime
View raw message