hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject [20/55] [abbrv] hive git commit: HIVE-12164 : non-ascii characters shows improper with insert into (Aleksei Statkevich via Xuefu Zhang)
Date Thu, 12 Nov 2015 03:59:44 GMT
HIVE-12164 : non-ascii characters shows improper with insert into (Aleksei Statkevich via Xuefu
Zhang)

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d06b69f5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d06b69f5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d06b69f5

Branch: refs/heads/spark
Commit: d06b69f57624cd6b6bfafd8e28512b6e8ae03b6a
Parents: 95fcdb5
Author: Aleksei Statkevich <me.aleksei@gmail.com>
Authored: Mon Oct 19 22:37:00 2015 -0800
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Thu Nov 5 13:54:53 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 16 ++++++++---
 .../clientpositive/insert_values_nonascii.q     |  9 +++++++
 .../clientpositive/insert_values_nonascii.q.out | 28 ++++++++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index f3d7057..f7e2039 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -216,6 +216,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.Utils;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -733,6 +734,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   }
 
   /**
+   * Convert a string to Text format and write its bytes in the same way TextOutputFormat
would do.
+   * This is needed to properly encode non-ascii characters.
+   */
+  private static void writeAsText(String text, FSDataOutputStream out) throws IOException
{
+    Text to = new Text(text);
+    out.write(to.getBytes(), 0, to.getLength());
+  }
+
+  /**
    * Generate a temp table out of a value clause
    * See also {@link #preProcessForInsert(ASTNode, QB)}
    */
@@ -810,10 +820,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
             fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
           }
           if (isFirst) isFirst = false;
-          else out.writeBytes("\u0001");
-          out.writeBytes(unparseExprForValuesClause(value));
+          else writeAsText("\u0001", out);
+          writeAsText(unparseExprForValuesClause(value), out);
         }
-        out.writeBytes("\n");
+        writeAsText("\n", out);
         firstRow = false;
       }
       out.close();

http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/test/queries/clientpositive/insert_values_nonascii.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insert_values_nonascii.q b/ql/src/test/queries/clientpositive/insert_values_nonascii.q
new file mode 100644
index 0000000..2e4ef41
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insert_values_nonascii.q
@@ -0,0 +1,9 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.enforce.bucketing=true;
+
+create table insert_values_nonascii(t1 char(32), t2 string);
+
+insert into insert_values_nonascii values("Абвгде Garçu 谢谢",  "Kôkaku ありがとう"),
("ございます", "kidôtai한국어");
+
+select * from insert_values_nonascii;

http://git-wip-us.apache.org/repos/asf/hive/blob/d06b69f5/ql/src/test/results/clientpositive/insert_values_nonascii.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_values_nonascii.q.out b/ql/src/test/results/clientpositive/insert_values_nonascii.q.out
new file mode 100644
index 0000000..ca07bef
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insert_values_nonascii.q.out
@@ -0,0 +1,28 @@
+PREHOOK: query: create table insert_values_nonascii(t1 char(32), t2 string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_values_nonascii
+POSTHOOK: query: create table insert_values_nonascii(t1 char(32), t2 string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_values_nonascii
+PREHOOK: query: insert into insert_values_nonascii values("Абвгде Garçu 谢谢", 
"Kôkaku ありがとう"), ("ございます", "kidôtai한국어")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@insert_values_nonascii
+POSTHOOK: query: insert into insert_values_nonascii values("Абвгде Garçu 谢谢",
 "Kôkaku ありがとう"), ("ございます", "kidôtai한국어")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@insert_values_nonascii
+POSTHOOK: Lineage: insert_values_nonascii.t1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: insert_values_nonascii.t2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: select * from insert_values_nonascii
+PREHOOK: type: QUERY
+PREHOOK: Input: default@insert_values_nonascii
+#### A masked pattern was here ####
+POSTHOOK: query: select * from insert_values_nonascii
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@insert_values_nonascii
+#### A masked pattern was here ####
+Абвгде Garçu 谢谢                 	Kôkaku ありがとう
+ございます                           	kidôtai한국어


Mime
View raw message