trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hzel...@apache.org
Subject [1/9] incubator-trafodion git commit: for jira 1720, add support to convert gbk into utf8
Date Mon, 01 Feb 2016 21:49:33 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master aa3deffb1 -> 9cc1e835f


for jira 1720, add support to convert gbk into utf8


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d51d2016
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d51d2016
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d51d2016

Branch: refs/heads/master
Commit: d51d2016d29725e3d863b351b3a548ee1b325c0c
Parents: 141f354
Author: Cloud User <centos@ming01.novalocal>
Authored: Sat Jan 9 11:43:21 2016 +0000
Committer: Cloud User <centos@ming01.novalocal>
Committed: Sat Jan 9 11:43:21 2016 +0000

----------------------------------------------------------------------
 core/sql/common/csconvert.cpp       | 42 ++++++++++++++++++++++++++++++++
 core/sql/common/csconvert.h         |  4 +++
 core/sql/exp/exp_clause_derived.h   |  4 ++-
 core/sql/exp/exp_conv.cpp           | 26 ++++++++++++++++++++
 core/sql/generator/GenItemFunc.cpp  |  3 +++
 core/sql/optimizer/ItemExpr.cpp     |  2 ++
 core/sql/optimizer/ItemFunc.h       |  1 +
 core/sql/optimizer/SynthType.cpp    | 12 +++++++++
 core/sql/sqlcomp/DefaultConstants.h |  3 +++
 core/sql/sqlcomp/nadefaults.cpp     |  2 ++
 10 files changed, 98 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp
index ffc5370..b0d21af 100644
--- a/core/sql/common/csconvert.cpp
+++ b/core/sql/common/csconvert.cpp
@@ -30,6 +30,10 @@
 //       but also used by the ODBC build and maybe others.
 
 #include <limits.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <stdlib.h>
+
 #include "multi-byte.h"
 #include "fcconv.h"
 #include "csconvert.h"
@@ -1275,3 +1279,41 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer )
      rtnv-- ;
   return rtnv ;
 }
+/* A method to do character set conversion , using Glibc iconv */
+int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen,
char *outbuf,size_t outlen)
+{
+  iconv_t cd;
+  int rc;
+  char **pin = &inbuf;
+  char **pout = &outbuf;
+
+  cd = iconv_open(to_charset,from_charset);
+  if (cd==0) return -1;
+  memset(outbuf,0,outlen);
+  if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1) 
+  {
+    iconv_close(cd);
+    return -1;
+  }
+  iconv_close(cd);
+  return outlen;
+}
+/* from gbk to utf8 */
+int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen)
+{
+  return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen);
+}
+
+int gbkToUtf8(char* gbkString, size_t gbklen, 
+              char* result ,size_t outlen, int addNullAtEnd)
+{
+
+   int finalLength = gbk2utf8 ( gbkString, gbklen,  result, outlen);
+   
+   if (finalLength == -1 ) return 0;
+   
+   if ( addNullAtEnd > 0 )
+      result[finalLength] = 0;
+
+   return finalLength;
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.h
----------------------------------------------------------------------
diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h
index 56c9603..57fec71 100644
--- a/core/sql/common/csconvert.h
+++ b/core/sql/common/csconvert.h
@@ -106,6 +106,10 @@ int  UTF16ToLocale( const enum cnv_version version,
                     unsigned int * translated_char_cnt_p = NULL  ,
                     const char *substitution_char        = NULL );
 
+NA_EIDPROC
+int gbkToUtf8(char* gbkString, size_t gbklen,
+              char* result ,size_t outlen, int addNullAtEnd=FALSE);
+
 /*
  * LocaleCharToUCS4() converts the FIRST char in the input string to its
  * UCS4 value.  Returns the UCS4 value at location specified AND the

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_clause_derived.h
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_clause_derived.h b/core/sql/exp/exp_clause_derived.h
index fc6ecfe..4558431 100644
--- a/core/sql/exp/exp_clause_derived.h
+++ b/core/sql/exp/exp_clause_derived.h
@@ -1542,7 +1542,9 @@ enum conv_case_index {
   CONV_UTF8_F_UCS2_V                   =248,
 
   CONV_BLOB_BLOB                       =249,
-  CONV_BLOB_ASCII_F                    =250
+  CONV_BLOB_ASCII_F                    =250,
+
+  CONV_GBK_F_UTF8_V                    =251
 };
 
 class SQLEXP_LIB_FUNC  ex_conv_clause : public ex_clause {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_conv.cpp
----------------------------------------------------------------------
diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp
index 8c31efc..015ec0f 100644
--- a/core/sql/exp/exp_conv.cpp
+++ b/core/sql/exp/exp_conv.cpp
@@ -9321,6 +9321,32 @@ convDoIt(char * source,
   };
   break;
 
+// gb2312 -> utf8
+  case CONV_GBK_F_UTF8_V:
+  {
+    char * targetbuf = new char[sourceLen*4+1];
+    size_t sl = sourceLen;
+    int convLen = gbkToUtf8( source, sl, targetbuf, sl*4);
+    int copyLen = 0;
+    if (convLen > 0) {
+      copyLen = (convLen< targetLen) ? convLen: targetLen;
+      str_cpy_all(target, targetbuf, copyLen);
+    //  if (convLen > targetLen)
+
+    }
+    else {
+      // LCOV_EXCL_START
+      convLen = 0;
+      copyLen = 0;
+      // LCOV_EXCL_STOP
+    }
+
+    if ( varCharLen )
+       setVCLength(varCharLen, varCharLenSize, copyLen);
+    delete targetbuf;
+
+  };
+  break;
 // 5/10/98: sjis -> unicode
   case CONV_SJIS_F_UNICODE_F: 
   case CONV_SJIS_F_UNICODE_V: 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/generator/GenItemFunc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenItemFunc.cpp b/core/sql/generator/GenItemFunc.cpp
index c7e6748..8c8f7e6 100644
--- a/core/sql/generator/GenItemFunc.cpp
+++ b/core/sql/generator/GenItemFunc.cpp
@@ -1737,6 +1737,9 @@ short Translate::codeGen(Generator * generator)
      case UCS2_TO_UTF8:
 	convType = CONV_UCS2_F_UTF8_V;
 	break;
+     case GBK_TO_UTF8:
+        convType = CONV_GBK_F_UTF8_V;
+        break;
      case UNICODE_TO_ISO88591:
 	convType = CONV_UNICODE_F_ASCII_V;
 	break;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp
index 41b6b76..7df1a7e 100644
--- a/core/sql/optimizer/ItemExpr.cpp
+++ b/core/sql/optimizer/ItemExpr.cpp
@@ -13408,6 +13408,8 @@ Translate::Translate(ItemExpr *valPtr, NAString* map_table_name)
     map_table_id_ = Translate::SJIS_TO_UTF8;
   else if ( _strcmpi(map_table_name->data(), "UTF8TOSJIS") == 0 )
     map_table_id_ = Translate::UTF8_TO_SJIS;
+  else if ( _strcmpi(map_table_name->data(), "GBKTOUTF8") == 0 )
+    map_table_id_ = Translate::GBK_TO_UTF8;
 
                 else
                   if ( _strcmpi(map_table_name->data(), "KANJITOISO88591") == 0 )

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemFunc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ItemFunc.h b/core/sql/optimizer/ItemFunc.h
index 3bb7238..d794a3d 100644
--- a/core/sql/optimizer/ItemFunc.h
+++ b/core/sql/optimizer/ItemFunc.h
@@ -2112,6 +2112,7 @@ public:
         UTF8_TO_SJIS, SJIS_TO_UTF8, UTF8_TO_ISO88591,
         ISO88591_TO_UTF8,
         KANJI_MP_TO_ISO88591, KSC5601_MP_TO_ISO88591,
+        GBK_TO_UTF8,
         UNKNOWN_TRANSLATION};
 
   Translate(ItemExpr *valPtr, NAString* map_table_name);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/SynthType.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp
index b9d5518..56e341f 100644
--- a/core/sql/optimizer/SynthType.cpp
+++ b/core/sql/optimizer/SynthType.cpp
@@ -5207,6 +5207,18 @@ const NAType *Translate::synthesizeType()
          err4106arg = SQLCHARSETSTRING_UTF8;
        break;
 
+     case GBK_TO_UTF8:
+       if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet()
== CharInfo::UnknownCharSet )
+         charsetTarget = CharInfo::UTF8;
+       else
+       {
+            if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) ==  CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET)
)
+              err4106arg = SQLCHARSETCODE_GB2312;
+            else
+             charsetTarget = CharInfo::UTF8;
+       }
+       break;
+
      case ISO88591_TO_UTF8:
        if (translateSource->getCharSet() == CharInfo::ISO88591)
        {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 2778197..d10acc8 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3785,6 +3785,9 @@ enum DefaultConstants
   // set to ON to aggressively allocate ESP per core
   AGGRESSIVE_ESP_ALLOCATION_PER_CORE,
 
+  // real charset in the HIVE table
+  HIVE_FILE_CHARSET,
+
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
   __NUM_DEFAULT_ATTRIBUTES

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index f6d5604..cfe2cf3 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1958,6 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS,		"OFF"),
 
   DDkwd__(HIVE_DEFAULT_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
   DD_____(HIVE_DEFAULT_SCHEMA,                  "HIVE"),
+  DD_____(HIVE_FILE_CHARSET,            (char *)SQLCHARSETSTRING_UTF8),
   DD_____(HIVE_FILE_NAME,     "/hive/tpcds/customer/customer.dat" ),
   DD_____(HIVE_HDFS_STATS_LOG_FILE,             ""),
   DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE,          "-1"),
@@ -6390,6 +6391,7 @@ DefaultToken NADefaults::token(Int32 attrEnum,
   else {
     if ((attrEnum == TERMINAL_CHARSET) ||
         (attrEnum == USE_HIVE_SOURCE) ||
+        (attrEnum == HIVE_FILE_CHARSET) ||
         (attrEnum == HBASE_DATA_BLOCK_ENCODING_OPTION) ||
         (attrEnum == HBASE_COMPRESSION_OPTION))
       return DF_USER;


Mime
View raw message