Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E04D2186E5 for ; Tue, 30 Jun 2015 01:21:05 +0000 (UTC) Received: (qmail 37285 invoked by uid 500); 30 Jun 2015 01:20:57 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 37168 invoked by uid 500); 30 Jun 2015 01:20:57 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 33631 invoked by uid 99); 30 Jun 2015 01:20:55 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Jun 2015 01:20:55 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3C9C9E360A; Tue, 30 Jun 2015 01:20:55 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: sershe@apache.org To: commits@hive.apache.org Date: Tue, 30 Jun 2015 01:21:48 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [55/57] [abbrv] hive git commit: HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi via Xuefu) HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/88482c3f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/88482c3f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/88482c3f Branch: refs/heads/llap Commit: 88482c3f74b9aeb3fec5f39851dce3e4cf3fa8a0 Parents: 2272443 Author: Xuefu Zhang Authored: Mon Jun 29 08:45:49 2015 -0700 Committer: Xuefu Zhang Committed: Mon Jun 29 08:45:49 2015 -0700 ---------------------------------------------------------------------- data/files/encoding_iso-8859-1.txt | 4 +++ .../queries/clientpositive/encoding_nonutf8.q | 7 ++++ .../clientpositive/encoding_nonutf8.q.out | 36 ++++++++++++++++++++ .../apache/hadoop/hive/serde2/SerDeUtils.java | 2 +- 4 files changed, 48 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/88482c3f/data/files/encoding_iso-8859-1.txt ---------------------------------------------------------------------- diff --git a/data/files/encoding_iso-8859-1.txt b/data/files/encoding_iso-8859-1.txt new file mode 100644 index 0000000..7df6dc2 --- /dev/null +++ b/data/files/encoding_iso-8859-1.txt @@ -0,0 +1,4 @@ +M�ller,Thomas +J�rgensen,J�rgen +Pe�a,Andr�s +N�m,F�k http://git-wip-us.apache.org/repos/asf/hive/blob/88482c3f/ql/src/test/queries/clientpositive/encoding_nonutf8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/encoding_nonutf8.q b/ql/src/test/queries/clientpositive/encoding_nonutf8.q new file mode 100644 index 0000000..f416760 --- /dev/null +++ b/ql/src/test/queries/clientpositive/encoding_nonutf8.q @@ -0,0 +1,7 @@ +drop table if exists encodelat1; +create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1'); +load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1; +select * from encodelat1; + http://git-wip-us.apache.org/repos/asf/hive/blob/88482c3f/ql/src/test/results/clientpositive/encoding_nonutf8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/encoding_nonutf8.q.out b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out new file mode 100644 index 0000000..63c8b45 --- /dev/null +++ b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out @@ -0,0 +1,36 @@ +PREHOOK: query: drop table if exists encodelat1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists encodelat1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: select * from encodelat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encodelat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +Müller,Thomas +Jørgensen,Jørgen +Peña,Andrés +Nåm,Fæk http://git-wip-us.apache.org/repos/asf/hive/blob/88482c3f/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 8dada5a..40ede1a 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -551,7 +551,7 @@ public final class SerDeUtils { } public static Text transformTextToUTF8(Text text, Charset previousCharset) { - return new Text(new String(text.getBytes(), previousCharset)); + return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset)); } public static Text transformTextFromUTF8(Text text, Charset targetCharset) {