trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject incubator-trafodion git commit: Merge [TRAFODION-762] PR-496 support HIVE data type VARCHAR and DATE
Date Mon, 06 Jun 2016 20:30:57 GMT
Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 79fad0ed8 -> 4f1089288


Merge [TRAFODION-762] PR-496 support HIVE data type VARCHAR and DATE

Squashed commit of the following:

commit 46c49e0c148c0a6b9394f059a04d98ea5f13b3aa
Author: Liu Ming <ovis_poly@sina.com>
Date:   Sat Jun 4 03:47:09 2016 +0000

    [TRAFODION-762] add data type varchar and date for hive scan

commit c6e4654a2d67ce1f8f1f7ae758fbeda90b51b85e
Author: Liu Ming <ovis_poly@sina.com>
Date:   Fri Jun 3 00:33:06 2016 +0000

    [TRAFODION-762] add data type varchar and date for hive scan

commit d5590d61b92dbb2351b2320b3c86e2287e114f87
Author: Liu Ming <ovis_poly@sina.com>
Date:   Fri May 20 10:21:59 2016 +0000

    [TRAFODION-762] support HIVE data type VARCHAR and DATE

commit c04c48a047cb88472c4b161195d29763d35ab5eb
Author: Liu Ming <ovis_poly@sina.com>
Date:   Fri May 20 05:15:59 2016 +0000

    [TRAFODION-762] support HIVE data type VARCHAR and DATE


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/4f108928
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/4f108928
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/4f108928

Branch: refs/heads/master
Commit: 4f1089288011385604cf72055b624e245aa0a4d1
Parents: 79fad0e
Author: selvaganesang <selva@apache.org>
Authored: Mon Jun 6 20:28:26 2016 +0000
Committer: selvaganesang <selva@apache.org>
Committed: Mon Jun 6 20:28:26 2016 +0000

----------------------------------------------------------------------
 core/sql/executor/ExExeUtilGet.cpp       | 73 +++++++++++++++++++++++++--
 core/sql/optimizer/NATable.cpp           | 70 ++++++++++++++++++++++++-
 core/sql/regress/hive/EXPECTED005        | 12 ++---
 core/sql/regress/hive/TEST005_a.hive.sql |  8 ++-
 core/sql/regress/hive/tbl_type.data      |  2 +-
 core/sql/sqlcomp/DefaultConstants.h      |  1 +
 core/sql/sqlcomp/nadefaults.cpp          |  4 ++
 7 files changed, 156 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/executor/ExExeUtilGet.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExExeUtilGet.cpp b/core/sql/executor/ExExeUtilGet.cpp
index b88bfd8..7af9c32 100644
--- a/core/sql/executor/ExExeUtilGet.cpp
+++ b/core/sql/executor/ExExeUtilGet.cpp
@@ -4920,6 +4920,12 @@ Lng32 ExExeUtilHiveMDaccessTcb::getFSTypeFromHiveColType(const char*
hiveType)
   if ( !strcmp(hiveType, "timestamp"))
     return REC_DATETIME;
 
+  if ( !strcmp(hiveType, "date"))
+    return REC_DATETIME;
+
+  if ( !strncmp(hiveType, "varchar",7) )
+    return REC_BYTE_V_ASCII;
+
   return -1;
 }
 
@@ -4939,8 +4945,22 @@ Lng32 ExExeUtilHiveMDaccessTcb::getLengthFromHiveColType(const char*
hiveType)
 
   if ( !strcmp(hiveType, "string")) {
     char maxStrLen[100];
+    char maxStrLenInBytes[100];
     cliInterface()->getCQDval("HIVE_MAX_STRING_LENGTH", maxStrLen);
-    return atoi(maxStrLen); // TBD: add cqd.
+    cliInterface()->getCQDval("HIVE_MAX_STRING_LENGTH_IN_BYTES", maxStrLenInBytes);
+    //Hive varchar(n) contains n character instead of n bytes
+    //so trafodion map hive varchar(n) into Trafodion varchar(n)
+    //but hive string will map to Trafodion varchar(n BYTES)
+    //So this CQD will be confusing
+    //We change the CQD name to explicitly indicate it is lenght in bytes
+    //For backward compatibility, HIVE_MAX_STRING_LENGTH still remains now, but is deprecated,
user can still use it
+    //But HIVE_MAX_STRING_LENGTH_IN_BYTES will overwrite HIVE_MAX_STRING_LENGTH if changed
+    Int32 hiveMaxLenInBytes = atoi(maxStrLenInBytes);
+    Int32 hiveMaxLen = atoi(maxStrLen);
+    if( hiveMaxLenInBytes != 32000 ) //HIVE_MAX_STRING_LENGTH_IN_BYTES changed
+      return hiveMaxLenInBytes;
+    else
+      return hiveMaxLen;  
   }
 
   if ( !strcmp(hiveType, "float"))
@@ -4952,6 +4972,42 @@ Lng32 ExExeUtilHiveMDaccessTcb::getLengthFromHiveColType(const char*
hiveType)
   if ( !strcmp(hiveType, "timestamp"))
     return 26; //Is this internal or display length? REC_DATETIME;
 
+  if ( !strcmp(hiveType, "date"))
+    return 10; //Is this internal or display length? REC_DATETIME;
+  
+  if ( !strncmp(hiveType, "varchar",7) )
+  {
+    //try to get the length
+    char maxLen[32];
+    memset(maxLen, 0, 32);
+    Int32 i=0,j=0;
+    Int16 copyit = 0;
+    Int32 hiveTypeLen = strlen(hiveType);
+
+    if( hiveTypeLen  > 39)  return -1;  
+ 
+    for(i = 0; i < hiveTypeLen ; i++)
+    {
+      if(hiveType[i] == '(')  
+      {
+        copyit=1;
+        continue;
+      }
+      else if(hiveType[i] == ')')  
+        break;
+      if(copyit == 1 )
+      {
+        maxLen[j] = hiveType[i];
+        j++;
+      }
+    }
+
+    Int32 len = atoi(maxLen);
+
+    if (len == 0) return -1;
+    else
+      return len;
+  }
   return -1;
 }
 
@@ -5183,14 +5239,23 @@ short ExExeUtilHiveMDaccessTcb::work()
 	    str_pad(infoCol->dtQualifier, 28, ' ');
 
 	    if (infoCol->fsDatatype == REC_DATETIME)
-	      {
-		// hive currently only supports timestamp
+	    {
+              if(infoCol->colSize > 10) {
 		infoCol->dtCode = SQLDTCODE_TIMESTAMP;
 		infoCol->colScale = 6;
 		str_cpy(infoCol->dtQualifier, "(6)", 28, ' ');
 		infoCol->dtStartField = 1;
 		infoCol->dtEndField = 6;
-	      }
+              }
+              else
+              {
+		infoCol->dtCode = SQLDTCODE_DATE;
+		infoCol->colScale = 0;
+	        str_pad(infoCol->dtQualifier, 28, ' ');
+		infoCol->dtStartField = 1;
+		infoCol->dtEndField = 6;
+              }
+	    }
 
 	    // no default value
 	    str_cpy(infoCol->defVal, " ", 240, ' ');

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/optimizer/NATable.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp
index 66d18b6..7150963 100644
--- a/core/sql/optimizer/NATable.cpp
+++ b/core/sql/optimizer/NATable.cpp
@@ -3533,9 +3533,16 @@ NAType* getSQColTypeForHive(const char* hiveType, NAMemory* heap)
   if ( !strcmp(hiveType, "string"))
     {
       Int32 len = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH);
+      Int32 lenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+      if( lenInBytes != 32000 ) 
+        len = lenInBytes;
       NAString hiveCharset =
         ActiveSchemaDB()->getDefaults().getValue(HIVE_DEFAULT_CHARSET);
-      return new (heap) SQLVarChar(CharLenInfo((hiveCharset == CharInfo::UTF8 ? 0 : len),len),
+      hiveCharset.toUpper();
+      CharInfo::CharSet hiveCharsetEnum = CharInfo::getCharSetEnum(hiveCharset);
+      Int32 maxNumChars = 0;
+      Int32 storageLen = len;
+      return new (heap) SQLVarChar(CharLenInfo(maxNumChars, storageLen),
                                    TRUE, // allow NULL
                                    FALSE, // not upshifted
                                    FALSE, // not case-insensitive
@@ -3553,6 +3560,60 @@ NAType* getSQColTypeForHive(const char* hiveType, NAMemory* heap)
   if ( !strcmp(hiveType, "timestamp"))
     return new (heap) SQLTimestamp(TRUE /* allow NULL */ , 6, heap);
 
+  if ( !strcmp(hiveType, "date"))
+    return new (heap) SQLDate(TRUE /* allow NULL */ , heap);
+
+  if ( !strncmp(hiveType, "varchar", 7) )
+  {
+    char maxLen[32];
+    memset(maxLen, 0, 32);
+    int i=0,j=0;
+    int copyit = 0;
+
+    //get length
+    for(i = 0; i < strlen(hiveType) ; i++)
+    {
+      if(hiveType[i] == '(') //start
+      {
+        copyit=1;
+        continue;
+      }
+      else if(hiveType[i] == ')') //stop
+        break; 
+      if(copyit > 0)
+      {
+        maxLen[j] = hiveType[i];
+        j++;
+      }
+    }
+    Int32 len = atoi(maxLen);
+
+    if(len == 0) return NULL;  //cannot parse correctly
+
+    NAString hiveCharset =
+        ActiveSchemaDB()->getDefaults().getValue(HIVE_DEFAULT_CHARSET);
+
+    hiveCharset.toUpper();
+    CharInfo::CharSet hiveCharsetEnum = CharInfo::getCharSetEnum(hiveCharset);
+    Int32 maxNumChars = 0;
+    Int32 storageLen = len;
+    if (CharInfo::isVariableWidthMultiByteCharSet(hiveCharsetEnum))
+    {
+      // For Hive VARCHARs, the number specified is the max. number of characters,
+      // while we count in bytes when using HIVE_MAX_STRING_LENGTH for Hive STRING
+      // columns. Set the max character constraint and also adjust the required storage length.
+       maxNumChars = len;
+       storageLen = len * CharInfo::maxBytesPerChar(hiveCharsetEnum);
+    }
+    return new (heap) SQLVarChar(CharLenInfo(maxNumChars, storageLen),
+                                   TRUE, // allow NULL
+                                   FALSE, // not upshifted
+                                   FALSE, // not case-insensitive
+                                   CharInfo::getCharSetEnum(hiveCharset),
+                                   CharInfo::DefaultCollation,
+                                   CharInfo::IMPLICIT);
+  } 
+
   return NULL;
 }
 
@@ -5840,6 +5901,9 @@ NATable::NATable(BindWA *bindWA,
     tableConstructionHadWarnings_=TRUE;
 
   hiveDefaultStringLen_ = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH);
+  Int32 hiveDefaultStringLenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+  if( hiveDefaultStringLenInBytes != 32000 ) 
+      hiveDefaultStringLen_ = hiveDefaultStringLenInBytes;
 
   if (!(corrName.isSeabaseMD() || corrName.isSpecialTable()))
     setupPrivInfo();
@@ -7362,6 +7426,10 @@ NATable * NATableDB::get(const ExtendedQualName* key, BindWA* bindWA,
NABoolean
               (Int64) CmpCommon::getDefaultLong(HIVE_METADATA_REFRESH_INTERVAL);
             Int32 defaultStringLen = 
               CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH);
+            Int32 defaultStringLenInBytes = 
+              CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES);
+            if(defaultStringLenInBytes != 32000)
+              defaultStringLen = defaultStringLenInBytes;
             Int64 expirationTimestamp = refreshInterval;
             NAString defSchema =
               ActiveSchemaDB()->getDefaults().getValue(HIVE_DEFAULT_SCHEMA);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/regress/hive/EXPECTED005
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/EXPECTED005 b/core/sql/regress/hive/EXPECTED005
index 6e65f1a..0c505e3 100644
--- a/core/sql/regress/hive/EXPECTED005
+++ b/core/sql/regress/hive/EXPECTED005
@@ -433,10 +433,10 @@ ID           CHAPTER                    ENGLISH                    TRANSLATOR
 >>
 >>select * from tbl_type;
 
-TINT    SM      I            BIG                   STR                        F         
      D                          T
-------  ------  -----------  --------------------  -------------------------  ---------------
 -------------------------  --------------------------
+TINT    SM      I            BIG                   STR                        F         
      D                          T                           DT          VC
+------  ------  -----------  --------------------  -------------------------  ---------------
 -------------------------  --------------------------  ----------  ----------
 
-   201     202          203                   204  two hundred                 2.0000000E+002
  2.00000000000000000E+002  2022-02-02 22:22:22.222222
+   201     202          203                   204  two hundred                 2.0000000E+002
  2.00000000000000000E+002  2022-02-02 22:22:22.222222  2022-02-02  varchar
 
 --- 1 row(s) selected.
 >>insert into tbl_type_temp select * from tbl_type;
@@ -444,10 +444,10 @@ TINT    SM      I            BIG                   STR             
          F
 --- 1 row(s) inserted.
 >>select * from tbl_type_temp;
 
-TINT    SM      I            BIG                   STR                        F         
      D                          T
-------  ------  -----------  --------------------  -------------------------  ---------------
 -------------------------  --------------------------
+TINT    SM      I            BIG                   STR                        F         
      D                          T                           DT          VC
+------  ------  -----------  --------------------  -------------------------  ---------------
 -------------------------  --------------------------  ----------  ----------
 
-   201     202          203                   204  two hundred                 2.0000000E+002
  2.00000000000000000E+002  2022-02-02 22:22:22.222222
+   201     202          203                   204  two hundred                 2.0000000E+002
  2.00000000000000000E+002  2022-02-02 22:22:22.222222  2022-02-02  varchar
 
 --- 1 row(s) selected.
 >>

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/regress/hive/TEST005_a.hive.sql
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/TEST005_a.hive.sql b/core/sql/regress/hive/TEST005_a.hive.sql
index 20ecd31..ebdbdff 100644
--- a/core/sql/regress/hive/TEST005_a.hive.sql
+++ b/core/sql/regress/hive/TEST005_a.hive.sql
@@ -136,7 +136,9 @@ create external table tbl_type
      str         string,
      f           float,
      d           double,
-     t           timestamp
+     t           timestamp,
+     dt          date,
+     vc          varchar(10)
 )
 row format delimited fields terminated by '|'
 location '/user/hive/exttables/tbl_type';
@@ -151,7 +153,9 @@ create table tbl_type_temp
      str         string,
      f           float,
      d           double,
-     t           timestamp
+     t           timestamp,
+     dt          date,
+     vc          varchar(10)
 )
 row format delimited fields terminated by '|';
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/regress/hive/tbl_type.data
----------------------------------------------------------------------
diff --git a/core/sql/regress/hive/tbl_type.data b/core/sql/regress/hive/tbl_type.data
index bfa6257..c780fb3 100644
--- a/core/sql/regress/hive/tbl_type.data
+++ b/core/sql/regress/hive/tbl_type.data
@@ -1 +1 @@
-201|202|203|204|two hundred|2E2|2E+002|2022-02-02 22:22:22.222222
+201|202|203|204|two hundred|2E2|2E+002|2022-02-02 22:22:22.222222|2022-02-02|varchar

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 5606f13..aa90c7c 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3355,6 +3355,7 @@ enum DefaultConstants
   COLLECT_REORG_STATS,
   
   HIVE_MAX_STRING_LENGTH,
+  HIVE_MAX_STRING_LENGTH_IN_BYTES,
   HIVE_USE_FAKE_TABLE_DESC,
   HIVE_LIB_HDFS_PORT_OVERRIDE,
   HIVE_HDFS_STATS_LOG_FILE,

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/4f108928/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index 415670c..9ecfd29 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1951,6 +1951,9 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS,		"OFF"),
  // Main ones to use:
  // HIVE_MAX_STRING_LENGTH: Hive "string" data type gets converted
  //                         into a VARCHAR with this length
+ //                         This should be deprecated from Trafodion R2.1
+ // HIVE_MAX_STRING_LENGTH_IN_BYTES: Hive "string" data type gets converted
+ //                                  into a VARCHAR with this length
  // HIVE_MIN_BYTES_PER_ESP_PARTITION: Make one ESP for this many bytes
  // HIVE_NUM_ESPS_PER_DATANODE: Equivalent of MAX_ESPS_PER_CPU_PER_OP
  //                             Note that this is really per SeaQuest node
@@ -1968,6 +1971,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS,		"OFF"),
   DDint__(HIVE_LOCALITY_BALANCE_LEVEL,          "0"),
   DDui___(HIVE_MAX_ESPS,                        "9999"),
   DDui___(HIVE_MAX_STRING_LENGTH,               "32000"),
+  DDui___(HIVE_MAX_STRING_LENGTH_IN_BYTES,      "32000"),
   DDkwd__(HIVE_METADATA_JAVA_ACCESS,            "ON"),
   DDint__(HIVE_METADATA_REFRESH_INTERVAL,       "0"),
   DDflt0_(HIVE_MIN_BYTES_PER_ESP_PARTITION,     "67108864"),


Mime
View raw message