Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2431F18B2C for ; Thu, 12 Nov 2015 03:59:31 +0000 (UTC) Received: (qmail 75759 invoked by uid 500); 12 Nov 2015 03:59:27 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 75643 invoked by uid 500); 12 Nov 2015 03:59:27 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 73520 invoked by uid 99); 12 Nov 2015 03:59:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 12 Nov 2015 03:59:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C3DADE570C; Thu, 12 Nov 2015 03:59:25 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: xuefu@apache.org To: commits@hive.apache.org Date: Thu, 12 Nov 2015 03:59:44 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [20/55] [abbrv] hive git commit: HIVE-12164 : non-ascii characters shows improper with insert into (Aleksei Statkevich via Xuefu Zhang) HIVE-12164 : non-ascii characters shows improper with insert into (Aleksei Statkevich via Xuefu Zhang) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d06b69f5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d06b69f5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d06b69f5 Branch: refs/heads/spark Commit: d06b69f57624cd6b6bfafd8e28512b6e8ae03b6a Parents: 95fcdb5 Author: Aleksei Statkevich Authored: Mon Oct 19 22:37:00 2015 -0800 Committer: Ashutosh Chauhan Committed: Thu Nov 5 13:54:53 2015 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 16 ++++++++--- .../clientpositive/insert_values_nonascii.q | 9 +++++++ .../clientpositive/insert_values_nonascii.q.out | 28 ++++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index f3d7057..f7e2039 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -216,6 +216,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.security.UserGroupInformation; @@ -733,6 +734,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } /** + * Convert a string to Text format and write its bytes in the same way TextOutputFormat would do. + * This is needed to properly encode non-ascii characters. + */ + private static void writeAsText(String text, FSDataOutputStream out) throws IOException { + Text to = new Text(text); + out.write(to.getBytes(), 0, to.getLength()); + } + + /** * Generate a temp table out of a value clause * See also {@link #preProcessForInsert(ASTNode, QB)} */ @@ -810,10 +820,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", "")); } if (isFirst) isFirst = false; - else out.writeBytes("\u0001"); - out.writeBytes(unparseExprForValuesClause(value)); + else writeAsText("\u0001", out); + writeAsText(unparseExprForValuesClause(value), out); } - out.writeBytes("\n"); + writeAsText("\n", out); firstRow = false; } out.close(); http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/test/queries/clientpositive/insert_values_nonascii.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/insert_values_nonascii.q b/ql/src/test/queries/clientpositive/insert_values_nonascii.q new file mode 100644 index 0000000..2e4ef41 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_values_nonascii.q @@ -0,0 +1,9 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; + +create table insert_values_nonascii(t1 char(32), t2 string); + +insert into insert_values_nonascii values("Абвгде Garçu 谢谢", "Kôkaku ありがとう"), ("ございます", "kidôtai한국어"); + +select * from insert_values_nonascii; http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/test/results/clientpositive/insert_values_nonascii.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insert_values_nonascii.q.out b/ql/src/test/results/clientpositive/insert_values_nonascii.q.out new file mode 100644 index 0000000..ca07bef --- /dev/null +++ b/ql/src/test/results/clientpositive/insert_values_nonascii.q.out @@ -0,0 +1,28 @@ +PREHOOK: query: create table insert_values_nonascii(t1 char(32), t2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@insert_values_nonascii +POSTHOOK: query: create table insert_values_nonascii(t1 char(32), t2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@insert_values_nonascii +PREHOOK: query: insert into insert_values_nonascii values("Абвгде Garçu 谢谢", "Kôkaku ありがとう"), ("ございます", "kidôtai한국어") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@insert_values_nonascii +POSTHOOK: query: insert into insert_values_nonascii values("Абвгде Garçu 谢谢", "Kôkaku ありがとう"), ("ございます", "kidôtai한국어") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@insert_values_nonascii +POSTHOOK: Lineage: insert_values_nonascii.t1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: insert_values_nonascii.t2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select * from insert_values_nonascii +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_values_nonascii +#### A masked pattern was here #### +POSTHOOK: query: select * from insert_values_nonascii +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_values_nonascii +#### A masked pattern was here #### +Абвгде Garçu 谢谢 Kôkaku ありがとう +ございます kidôtai한국어