Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D5D4618668 for ; Mon, 29 Jun 2015 15:47:03 +0000 (UTC) Received: (qmail 28466 invoked by uid 500); 29 Jun 2015 15:47:03 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 28422 invoked by uid 500); 29 Jun 2015 15:47:03 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 28411 invoked by uid 99); 29 Jun 2015 15:47:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 29 Jun 2015 15:47:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 84C4CE33CA; Mon, 29 Jun 2015 15:47:03 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: xuefu@apache.org To: commits@hive.apache.org Message-Id: <3306b1f9eeb040deb097c192031b0170@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi via Xuefu) Date: Mon, 29 Jun 2015 15:47:03 +0000 (UTC) Repository: hive Updated Branches: refs/heads/branch-1 2912a2b56 -> e71fd213c HIVE-11112: ISO-8859-1 text output has fragments of previous longer rows appended (Yongzhi via Xuefu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e71fd213 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e71fd213 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e71fd213 Branch: refs/heads/branch-1 Commit: e71fd213c8925b715386e05c8c2bf62b565c9ab5 Parents: 2912a2b Author: Xuefu Zhang Authored: Mon Jun 29 08:45:49 2015 -0700 Committer: Xuefu Zhang Committed: Mon Jun 29 08:46:34 2015 -0700 ---------------------------------------------------------------------- data/files/encoding_iso-8859-1.txt | 4 +++ .../queries/clientpositive/encoding_nonutf8.q | 7 ++++ .../clientpositive/encoding_nonutf8.q.out | 36 ++++++++++++++++++++ .../apache/hadoop/hive/serde2/SerDeUtils.java | 2 +- 4 files changed, 48 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/data/files/encoding_iso-8859-1.txt ---------------------------------------------------------------------- diff --git a/data/files/encoding_iso-8859-1.txt b/data/files/encoding_iso-8859-1.txt new file mode 100644 index 0000000..7df6dc2 --- /dev/null +++ b/data/files/encoding_iso-8859-1.txt @@ -0,0 +1,4 @@ +M�ller,Thomas +J�rgensen,J�rgen +Pe�a,Andr�s +N�m,F�k http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/ql/src/test/queries/clientpositive/encoding_nonutf8.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/encoding_nonutf8.q b/ql/src/test/queries/clientpositive/encoding_nonutf8.q new file mode 100644 index 0000000..f416760 --- /dev/null +++ b/ql/src/test/queries/clientpositive/encoding_nonutf8.q @@ -0,0 +1,7 @@ +drop table if exists encodelat1; +create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1'); +load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1; +select * from encodelat1; + http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/ql/src/test/results/clientpositive/encoding_nonutf8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/encoding_nonutf8.q.out b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out new file mode 100644 index 0000000..63c8b45 --- /dev/null +++ b/ql/src/test/results/clientpositive/encoding_nonutf8.q.out @@ -0,0 +1,36 @@ +PREHOOK: query: drop table if exists encodelat1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists encodelat1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: create table encodelat1 (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@encodelat1 +POSTHOOK: query: load data local inpath '../../data/files/encoding_iso-8859-1.txt' overwrite into table encodelat1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@encodelat1 +PREHOOK: query: select * from encodelat1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encodelat1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encodelat1 +#### A masked pattern was here #### +Müller,Thomas +Jørgensen,Jørgen +Peña,Andrés +Nåm,Fæk http://git-wip-us.apache.org/repos/asf/hive/blob/e71fd213/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 8dada5a..40ede1a 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -551,7 +551,7 @@ public final class SerDeUtils { } public static Text transformTextToUTF8(Text text, Charset previousCharset) { - return new Text(new String(text.getBytes(), previousCharset)); + return new Text(new String(text.getBytes(), 0, text.getLength(), previousCharset)); } public static Text transformTextFromUTF8(Text text, Charset targetCharset) {