hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sze...@apache.org
Subject svn commit: r1673583 [3/27] - in /hive/branches/spark: ./ beeline/src/java/org/apache/hive/beeline/ bin/ cli/src/java/org/apache/hadoop/hive/cli/ cli/src/test/org/apache/hadoop/hive/cli/ common/ common/src/java/org/apache/hadoop/hive/common/ common/src...
Date Tue, 14 Apr 2015 23:36:09 GMT
Modified: hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (original)
+++ hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java Tue Apr 14 23:36:02 2015
@@ -266,7 +266,7 @@ public class TestJdbcWithMiniHS2 {
 
     // Set some conf parameters
     String hiveConf = "hive.cli.print.header=true;hive.server2.async.exec.shutdown.timeout=20;"
-        + "hive.server2.async.exec.threads=30;hive.server2.thrift.http.max.worker.threads=15";
+        + "hive.server2.async.exec.threads=30;hive.server2.thrift.max.worker.threads=15";
     // Set some conf vars
     String hiveVar = "stab=salesTable;icol=customerID";
     String jdbcUri = miniHS2.getJdbcURL() + "?" + hiveConf + "#" + hiveVar;
@@ -284,7 +284,7 @@ public class TestJdbcWithMiniHS2 {
     verifyConfProperty(stmt, "hive.cli.print.header", "true");
     verifyConfProperty(stmt, "hive.server2.async.exec.shutdown.timeout", "20");
     verifyConfProperty(stmt, "hive.server2.async.exec.threads", "30");
-    verifyConfProperty(stmt, "hive.server2.thrift.http.max.worker.threads",
+    verifyConfProperty(stmt, "hive.server2.thrift.max.worker.threads",
         "15");
     verifyConfProperty(stmt, "stab", "salesTable");
     verifyConfProperty(stmt, "icol", "customerID");

Modified: hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java (original)
+++ hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java Tue Apr 14 23:36:02 2015
@@ -155,7 +155,7 @@ public class TestSSL {
         cause = cause.getCause();
       }
       Assert.assertEquals("org.apache.http.NoHttpResponseException", cause.getClass().getName());
-      Assert.assertEquals("The target server failed to respond", cause.getMessage());
+      Assert.assertTrue(cause.getMessage().contains("failed to respond"));
     }
     miniHS2.stop();
   }

Modified: hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java (original)
+++ hive/branches/spark/itests/hive-unit/src/test/java/org/apache/hive/service/cli/thrift/TestThriftHttpCLIService.java Tue Apr 14 23:36:02 2015
@@ -160,7 +160,7 @@ public class TestThriftHttpCLIService ex
     String httpUrl = transportMode + "://" + host + ":" + port +
         "/" + thriftHttpPath + "/";
     httpClient.addRequestInterceptor(
-        new HttpBasicAuthInterceptor(USERNAME, PASSWORD));
+        new HttpBasicAuthInterceptor(USERNAME, PASSWORD, null, null));
     return new THttpClient(httpUrl, httpClient);
   }
 

Modified: hive/branches/spark/itests/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/pom.xml?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/itests/pom.xml (original)
+++ hive/branches/spark/itests/pom.xml Tue Apr 14 23:36:02 2015
@@ -93,6 +93,9 @@
                   mkdir -p $DOWNLOAD_DIR
                   download "http://d3jw87u4immizc.cloudfront.net/spark-tarball/spark-${spark.version}-bin-hadoop2-without-hive.tgz" "spark"
                   cp -f $HIVE_ROOT/data/conf/spark/log4j.properties $BASE_DIR/spark/conf/
+                  sed '/package /d' ${basedir}/${hive.path.to.root}/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java > /tmp/UDFExampleAdd.java
+                  javac -cp  ${settings.localRepository}/org/apache/hive/hive-exec/${project.version}/hive-exec-${project.version}.jar /tmp/UDFExampleAdd.java -d /tmp
+                  jar -cf /tmp/udfexampleadd-1.0.jar -C /tmp UDFExampleAdd.class
                 </echo>
               </target>
             </configuration>

Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Tue Apr 14 23:36:02 2015
@@ -29,6 +29,7 @@ minimr.query.files=auto_sortmerge_join_1
   list_bucket_dml_10.q,\
   load_fs2.q,\
   load_hdfs_file_with_space_in_the_name.q,\
+  non_native_window_udf.q, \
   optrstat_groupby.q,\
   parallel_orderby.q,\
   ql_rewrite_gbtoidx.q,\
@@ -180,9 +181,11 @@ minitez.query.files.shared=alter_merge_2
   update_where_non_partitioned.q,\
   update_where_partitioned.q,\
   update_two_cols.q,\
+  vector_aggregate_9.q,\
   vector_between_in.q,\
   vector_bucket.q,\
   vector_cast_constant.q,\
+  vector_char_2.q,\
   vector_char_4.q,\
   vector_char_mapjoin1.q,\
   vector_char_simple.q,\
@@ -190,6 +193,7 @@ minitez.query.files.shared=alter_merge_2
   vector_coalesce_2.q,\
   vector_count_distinct.q,\
   vector_data_types.q,\
+  vector_date_1.q,\
   vector_decimal_1.q,\
   vector_decimal_10_0.q,\
   vector_decimal_2.q,\
@@ -203,6 +207,8 @@ minitez.query.files.shared=alter_merge_2
   vector_decimal_mapjoin.q,\
   vector_decimal_math_funcs.q,\
   vector_decimal_precision.q,\
+  vector_decimal_round.q,\
+  vector_decimal_round_2.q,\
   vector_decimal_trailing.q,\
   vector_decimal_udf.q,\
   vector_decimal_udf2.q,\
@@ -210,8 +216,12 @@ minitez.query.files.shared=alter_merge_2
   vector_elt.q,\
   vector_groupby_3.q,\
   vector_groupby_reduce.q,\
+  vector_if_expr.q,\
+  vector_interval_1.q,\
+  vector_interval_2.q,\
   vector_left_outer_join.q,\
   vector_mapjoin_reduce.q,\
+  vector_multi_insert.q,\
   vector_non_string_partition.q,\
   vector_orderby_5.q,\
   vector_partition_diff_num_cols.q,\
@@ -287,6 +297,8 @@ minitez.query.files=bucket_map_join_tez1
   bucket_map_join_tez2.q,\
   dynamic_partition_pruning.q,\
   dynamic_partition_pruning_2.q,\
+  explainuser_1.q,\
+  explainuser_2.q,\
   hybridhashjoin.q,\
   mapjoin_decimal.q,\
   lvj_mapjoin.q, \

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveBaseResultSet.java Tue Apr 14 23:36:02 2015
@@ -44,6 +44,8 @@ import java.util.Calendar;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.Type;
 
@@ -443,6 +445,10 @@ public abstract class HiveBaseResultSet
         return new BigDecimal((String)value);
       case DATE_TYPE:
         return Date.valueOf((String) value);
+      case INTERVAL_YEAR_MONTH_TYPE:
+        return HiveIntervalYearMonth.valueOf((String) value);
+      case INTERVAL_DAY_TIME_TYPE:
+        return HiveIntervalDayTime.valueOf((String) value);
       case ARRAY_TYPE:
       case MAP_TYPE:
       case STRUCT_TYPE:

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveConnection.java Tue Apr 14 23:36:02 2015
@@ -50,10 +50,11 @@ import java.util.concurrent.TimeUnit;
 import javax.security.sasl.Sasl;
 import javax.security.sasl.SaslException;
 
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.protocol.HttpContext;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hive.jdbc.Utils.JdbcConnectionParams;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.auth.KerberosSaslHelper;
@@ -73,9 +74,17 @@ import org.apache.hive.service.cli.thrif
 import org.apache.hive.service.cli.thrift.TRenewDelegationTokenResp;
 import org.apache.hive.service.cli.thrift.TSessionHandle;
 import org.apache.http.HttpRequestInterceptor;
-import org.apache.http.conn.scheme.Scheme;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.CookieStore;
+import org.apache.http.client.ServiceUnavailableRetryStrategy;
+import org.apache.http.config.Registry;
+import org.apache.http.config.RegistryBuilder;
+import org.apache.http.conn.socket.ConnectionSocketFactory;
 import org.apache.http.conn.ssl.SSLSocketFactory;
-import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.client.BasicCookieStore;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.impl.conn.BasicHttpClientConnectionManager;
 import org.apache.thrift.TException;
 import org.apache.thrift.protocol.TBinaryProtocol;
 import org.apache.thrift.transport.THttpClient;
@@ -173,6 +182,7 @@ public class HiveConnection implements j
     supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V5);
     supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6);
     supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V7);
+    supportedProtocols.add(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8);
 
     // open client session
     openSession();
@@ -235,7 +245,7 @@ public class HiveConnection implements j
   }
 
   private TTransport createHttpTransport() throws SQLException, TTransportException {
-    DefaultHttpClient httpClient;
+    CloseableHttpClient httpClient;
     boolean useSsl = isSslConnection();
     // Create an http client from the configs
     httpClient = getHttpClient(useSsl);
@@ -259,35 +269,76 @@ public class HiveConnection implements j
     return transport;
   }
 
-  private DefaultHttpClient getHttpClient(Boolean useSsl) throws SQLException {
-    DefaultHttpClient httpClient = new DefaultHttpClient();
+  private CloseableHttpClient getHttpClient(Boolean useSsl) throws SQLException {
+    boolean isCookieEnabled = sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH) == null ||
+      (!JdbcConnectionParams.COOKIE_AUTH_FALSE.equalsIgnoreCase(
+      sessConfMap.get(JdbcConnectionParams.COOKIE_AUTH)));
+    String cookieName = sessConfMap.get(JdbcConnectionParams.COOKIE_NAME) == null ?
+      JdbcConnectionParams.DEFAULT_COOKIE_NAMES_HS2 :
+      sessConfMap.get(JdbcConnectionParams.COOKIE_NAME);
+    CookieStore cookieStore = isCookieEnabled ? new BasicCookieStore() : null;
+    HttpClientBuilder httpClientBuilder;
     // Request interceptor for any request pre-processing logic
     HttpRequestInterceptor requestInterceptor;
-    // If Kerberos
+
+    // Configure http client for kerberos/password based authentication
     if (isKerberosAuthMode()) {
       /**
        * Add an interceptor which sets the appropriate header in the request.
        * It does the kerberos authentication and get the final service ticket,
        * for sending to the server before every request.
        * In https mode, the entire information is encrypted
-       * TODO: Optimize this with a mix of kerberos + using cookie.
        */
       requestInterceptor =
           new HttpKerberosRequestInterceptor(sessConfMap.get(JdbcConnectionParams.AUTH_PRINCIPAL),
-              host, getServerHttpUrl(useSsl), assumeSubject);
+              host, getServerHttpUrl(useSsl), assumeSubject, cookieStore, cookieName);
     }
     else {
       /**
        * Add an interceptor to pass username/password in the header.
        * In https mode, the entire information is encrypted
        */
-      requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword());
+      requestInterceptor = new HttpBasicAuthInterceptor(getUserName(), getPassword(),
+                                                        cookieStore, cookieName);
+    }
+    // Configure http client for cookie based authentication
+    if (isCookieEnabled) {
+      // Create a http client with a retry mechanism when the server returns a status code of 401.
+      httpClientBuilder =
+      HttpClients.custom().setServiceUnavailableRetryStrategy(
+        new  ServiceUnavailableRetryStrategy() {
+
+      @Override
+      public boolean retryRequest(
+        final HttpResponse response,
+        final int executionCount,
+        final HttpContext context) {
+        int statusCode = response.getStatusLine().getStatusCode();
+        boolean ret = statusCode == 401 && executionCount <= 1;
+
+        // Set the context attribute to true which will be interpreted by the request interceptor
+        if (ret) {
+          context.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_TRUE);
+        }
+        return ret;
+      }
+
+      @Override
+      public long getRetryInterval() {
+        // Immediate retry
+        return 0;
+      }
+    });
+    } else {
+      httpClientBuilder = HttpClientBuilder.create();
     }
-    // Configure httpClient for SSL
+    // Add the request interceptor to the client builder
+    httpClientBuilder.addInterceptorFirst(requestInterceptor);
+    // Configure http client for SSL
     if (useSsl) {
       String sslTrustStorePath = sessConfMap.get(JdbcConnectionParams.SSL_TRUST_STORE);
       String sslTrustStorePassword = sessConfMap.get(
-          JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD);
+        JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD);
       KeyStore sslTrustStore;
       SSLSocketFactory socketFactory;
       /**
@@ -311,21 +362,25 @@ public class HiveConnection implements j
           // Pick trust store config from the given path
           sslTrustStore = KeyStore.getInstance(JdbcConnectionParams.SSL_TRUST_STORE_TYPE);
           sslTrustStore.load(new FileInputStream(sslTrustStorePath),
-              sslTrustStorePassword.toCharArray());
+            sslTrustStorePassword.toCharArray());
           socketFactory = new SSLSocketFactory(sslTrustStore);
         }
         socketFactory.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
-        Scheme sslScheme = new Scheme("https", 443, socketFactory);
-        httpClient.getConnectionManager().getSchemeRegistry().register(sslScheme);
+
+        final Registry<ConnectionSocketFactory> registry =
+          RegistryBuilder.<ConnectionSocketFactory>create()
+          .register("https", socketFactory)
+          .build();
+
+        httpClientBuilder.setConnectionManager(new BasicHttpClientConnectionManager(registry));
       }
       catch (Exception e) {
         String msg =  "Could not create an https connection to " +
-            jdbcUriString + ". " + e.getMessage();
+          jdbcUriString + ". " + e.getMessage();
         throw new SQLException(msg, " 08S01", e);
       }
     }
-    httpClient.addRequestInterceptor(requestInterceptor);
-    return httpClient;
+    return httpClientBuilder.build();
   }
 
   /**

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HiveResultSetMetaData.java Tue Apr 14 23:36:02 2015
@@ -21,6 +21,7 @@ package org.apache.hive.jdbc;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.util.List;
+import org.apache.hive.service.cli.Type;
 
 /**
  * HiveResultSetMetaData.
@@ -43,9 +44,13 @@ public class HiveResultSetMetaData imple
     throw new SQLException("Method not supported");
   }
 
+  private Type getHiveType(int column) throws SQLException {
+    return JdbcColumn.typeStringToHiveType(columnTypes.get(toZeroIndex(column)));
+  }
+
   public String getColumnClassName(int column) throws SQLException {
-    int columnType = getColumnType(column);
-    return JdbcColumn.columnClassName(columnType, columnAttributes.get(toZeroIndex(column)));
+    return JdbcColumn.columnClassName(getHiveType(column),
+        columnAttributes.get(toZeroIndex(column)));
   }
 
   public int getColumnCount() throws SQLException {
@@ -53,9 +58,8 @@ public class HiveResultSetMetaData imple
   }
 
   public int getColumnDisplaySize(int column) throws SQLException {
-    int columnType = getColumnType(column);
-
-    return JdbcColumn.columnDisplaySize(columnType, columnAttributes.get(toZeroIndex(column)));
+    return JdbcColumn.columnDisplaySize(getHiveType(column),
+        columnAttributes.get(toZeroIndex(column)));
   }
 
   public String getColumnLabel(int column) throws SQLException {
@@ -79,15 +83,13 @@ public class HiveResultSetMetaData imple
   }
 
   public int getPrecision(int column) throws SQLException {
-    int columnType = getColumnType(column);
-
-    return JdbcColumn.columnPrecision(columnType, columnAttributes.get(toZeroIndex(column)));
+    return JdbcColumn.columnPrecision(getHiveType(column),
+        columnAttributes.get(toZeroIndex(column)));
   }
 
   public int getScale(int column) throws SQLException {
-    int columnType = getColumnType(column);
-
-    return JdbcColumn.columnScale(columnType, columnAttributes.get(toZeroIndex(column)));
+    return JdbcColumn.columnScale(getHiveType(column),
+        columnAttributes.get(toZeroIndex(column)));
   }
 
   public String getSchemaName(int column) throws SQLException {

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpBasicAuthInterceptor.java Tue Apr 14 23:36:02 2015
@@ -25,6 +25,8 @@ import org.apache.http.HttpException;
 import org.apache.http.HttpRequest;
 import org.apache.http.HttpRequestInterceptor;
 import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.CookieStore;
+import org.apache.http.client.protocol.ClientContext;
 import org.apache.http.impl.auth.AuthSchemeBase;
 import org.apache.http.impl.auth.BasicScheme;
 import org.apache.http.protocol.HttpContext;
@@ -37,20 +39,42 @@ import org.apache.http.protocol.HttpCont
 public class HttpBasicAuthInterceptor implements HttpRequestInterceptor {
   UsernamePasswordCredentials credentials;
   AuthSchemeBase authScheme;
+  CookieStore cookieStore;
+  boolean isCookieEnabled;
+  String cookieName;
 
-  public HttpBasicAuthInterceptor(String username, String password) {
+  public HttpBasicAuthInterceptor(String username, String password, CookieStore cookieStore,
+                           String cn) {
     if(username != null){
       credentials = new UsernamePasswordCredentials(username, password);
     }
     authScheme = new BasicScheme();
+    this.cookieStore = cookieStore;
+    isCookieEnabled = (cookieStore != null);
+    cookieName = cn;
   }
 
   @Override
   public void process(HttpRequest httpRequest, HttpContext httpContext)
       throws HttpException, IOException {
-    Header basicAuthHeader = authScheme.authenticate(
-        credentials, httpRequest, httpContext);
-    httpRequest.addHeader(basicAuthHeader);
+    if (isCookieEnabled) {
+      httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);
+    }
+    // Add the authentication details under the following scenarios:
+    // 1. Cookie Authentication is disabled OR
+    // 2. The first time when the request is sent OR
+    // 3. The server returns a 401, which sometimes means the cookie has expired
+    if (!isCookieEnabled || ((httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) == null &&
+        (cookieStore == null || (cookieStore != null &&
+        Utils.needToSendCredentials(cookieStore, cookieName)))) ||
+        (httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) != null &&
+         httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY).
+         equals(Utils.HIVE_SERVER2_RETRY_TRUE)))) {
+      Header basicAuthHeader = authScheme.authenticate(credentials, httpRequest, httpContext);
+      httpRequest.addHeader(basicAuthHeader);
+    }
+    if (isCookieEnabled) {
+      httpContext.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_FALSE);
+    }
   }
-
 }

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/HttpKerberosRequestInterceptor.java Tue Apr 14 23:36:02 2015
@@ -25,6 +25,8 @@ import org.apache.hive.service.auth.Http
 import org.apache.http.HttpException;
 import org.apache.http.HttpRequest;
 import org.apache.http.HttpRequestInterceptor;
+import org.apache.http.client.CookieStore;
+import org.apache.http.client.protocol.ClientContext;
 import org.apache.http.protocol.HttpContext;
 
 /**
@@ -40,31 +42,59 @@ public class HttpKerberosRequestIntercep
   String host;
   String serverHttpUrl;
   boolean assumeSubject;
+  CookieStore cookieStore;
+  boolean isCookieEnabled;
+  String cookieName;
 
   // A fair reentrant lock
   private static ReentrantLock kerberosLock = new ReentrantLock(true);
 
   public HttpKerberosRequestInterceptor(String principal, String host,
-      String serverHttpUrl, boolean assumeSubject) {
+      String serverHttpUrl, boolean assumeSubject, CookieStore cs, String cn) {
     this.principal = principal;
     this.host = host;
     this.serverHttpUrl = serverHttpUrl;
     this.assumeSubject = assumeSubject;
+    this.cookieStore = cs;
+    isCookieEnabled = (cs != null);
+    cookieName = cn;
   }
 
   @Override
   public void process(HttpRequest httpRequest, HttpContext httpContext)
       throws HttpException, IOException {
     String kerberosAuthHeader;
+
     try {
       // Generate the service ticket for sending to the server.
       // Locking ensures the tokens are unique in case of concurrent requests
       kerberosLock.lock();
-      kerberosAuthHeader = HttpAuthUtils.getKerberosServiceTicket(
-          principal, host, serverHttpUrl, assumeSubject);
-      // Set the session key token (Base64 encoded) in the headers
-      httpRequest.addHeader(HttpAuthUtils.AUTHORIZATION + ": " +
-          HttpAuthUtils.NEGOTIATE + " ", kerberosAuthHeader);
+      // If cookie based authentication is allowed, generate ticket only when necessary.
+      // The necessary condition is either when there are no server side cookies in the
+      // cookiestore which can be send back or when the server returns a 401 error code
+      // indicating that the previous cookie has expired.
+      if (isCookieEnabled) {
+        httpContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore);
+      }
+      // Generate the kerberos ticket under the following scenarios:
+      // 1. Cookie Authentication is disabled OR
+      // 2. The first time when the request is sent OR
+      // 3. The server returns a 401, which sometimes means the cookie has expired
+      if (!isCookieEnabled || ((httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) == null &&
+          (cookieStore == null || (cookieStore != null &&
+          Utils.needToSendCredentials(cookieStore, cookieName)))) ||
+          (httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY) != null &&
+          httpContext.getAttribute(Utils.HIVE_SERVER2_RETRY_KEY).
+          equals(Utils.HIVE_SERVER2_RETRY_TRUE)))) {
+        kerberosAuthHeader = HttpAuthUtils.getKerberosServiceTicket(
+            principal, host, serverHttpUrl, assumeSubject);
+        // Set the session key token (Base64 encoded) in the headers
+        httpRequest.addHeader(HttpAuthUtils.AUTHORIZATION + ": " +
+            HttpAuthUtils.NEGOTIATE + " ", kerberosAuthHeader);
+      }
+      if (isCookieEnabled) {
+        httpContext.setAttribute(Utils.HIVE_SERVER2_RETRY_KEY, Utils.HIVE_SERVER2_RETRY_FALSE);
+      }
     } catch (Exception e) {
       throw new HttpException(e.getMessage(), e);
     }

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/JdbcColumn.java Tue Apr 14 23:36:02 2015
@@ -18,7 +18,10 @@
 
 package org.apache.hive.jdbc;
 
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hive.service.cli.Type;
 
 import java.math.BigInteger;
 import java.sql.Date;
@@ -64,10 +67,12 @@ public class JdbcColumn {
     return type;
   }
 
-  static String columnClassName(int columnType, JdbcColumnAttributes columnAttributes)
+  static String columnClassName(Type hiveType, JdbcColumnAttributes columnAttributes)
       throws SQLException {
-    // according to hiveTypeToSqlType possible options are:
+    int columnType = hiveTypeToSqlType(hiveType);
     switch(columnType) {
+      case Types.NULL:
+        return "null";
       case Types.BOOLEAN:
         return Boolean.class.getName();
       case Types.CHAR:
@@ -93,7 +98,17 @@ public class JdbcColumn {
         return BigInteger.class.getName();
       case Types.BINARY:
         return byte[].class.getName();
-      case Types.JAVA_OBJECT:
+      case Types.OTHER:
+      case Types.JAVA_OBJECT: {
+        switch (hiveType) {
+          case INTERVAL_YEAR_MONTH_TYPE:
+            return HiveIntervalYearMonth.class.getName();
+          case INTERVAL_DAY_TIME_TYPE:
+            return HiveIntervalDayTime.class.getName();
+          default:
+            return String.class.getName();
+        }
+      }
       case Types.ARRAY:
       case Types.STRUCT:
         return String.class.getName();
@@ -102,45 +117,61 @@ public class JdbcColumn {
     }
   }
 
-  public static int hiveTypeToSqlType(String type) throws SQLException {
+  static Type typeStringToHiveType(String type) throws SQLException {
     if ("string".equalsIgnoreCase(type)) {
-      return Types.VARCHAR;
+      return Type.STRING_TYPE;
     } else if ("varchar".equalsIgnoreCase(type)) {
-      return Types.VARCHAR;
+      return Type.VARCHAR_TYPE;
     } else if ("char".equalsIgnoreCase(type)) {
-      return Types.CHAR;
+      return Type.CHAR_TYPE;
     } else if ("float".equalsIgnoreCase(type)) {
-      return Types.FLOAT;
+      return Type.FLOAT_TYPE;
     } else if ("double".equalsIgnoreCase(type)) {
-      return Types.DOUBLE;
+      return Type.DOUBLE_TYPE;
     } else if ("boolean".equalsIgnoreCase(type)) {
-      return Types.BOOLEAN;
+      return Type.BOOLEAN_TYPE;
     } else if ("tinyint".equalsIgnoreCase(type)) {
-      return Types.TINYINT;
+      return Type.TINYINT_TYPE;
     } else if ("smallint".equalsIgnoreCase(type)) {
-      return Types.SMALLINT;
+      return Type.SMALLINT_TYPE;
     } else if ("int".equalsIgnoreCase(type)) {
-      return Types.INTEGER;
+      return Type.INT_TYPE;
     } else if ("bigint".equalsIgnoreCase(type)) {
-      return Types.BIGINT;
+      return Type.BIGINT_TYPE;
     } else if ("date".equalsIgnoreCase(type)) {
-      return Types.DATE;
+      return Type.DATE_TYPE;
     } else if ("timestamp".equalsIgnoreCase(type)) {
-      return Types.TIMESTAMP;
+      return Type.TIMESTAMP_TYPE;
+    } else if ("interval_year_month".equalsIgnoreCase(type)) {
+      return Type.INTERVAL_YEAR_MONTH_TYPE;
+    } else if ("interval_day_time".equalsIgnoreCase(type)) {
+      return Type.INTERVAL_DAY_TIME_TYPE;
     } else if ("decimal".equalsIgnoreCase(type)) {
-      return Types.DECIMAL;
+      return Type.DECIMAL_TYPE;
     } else if ("binary".equalsIgnoreCase(type)) {
-      return Types.BINARY;
+      return Type.BINARY_TYPE;
     } else if ("map".equalsIgnoreCase(type)) {
-      return Types.JAVA_OBJECT;
+      return Type.MAP_TYPE;
     } else if ("array".equalsIgnoreCase(type)) {
-      return Types.ARRAY;
+      return Type.ARRAY_TYPE;
     } else if ("struct".equalsIgnoreCase(type)) {
-      return Types.STRUCT;
+      return Type.STRUCT_TYPE;
     }
     throw new SQLException("Unrecognized column type: " + type);
   }
 
+  public static int hiveTypeToSqlType(Type hiveType) throws SQLException {
+    return hiveType.toJavaSQLType();
+  }
+
+  public static int hiveTypeToSqlType(String type) throws SQLException {
+    if ("void".equalsIgnoreCase(type) || "null".equalsIgnoreCase(type)) {
+      return Types.NULL;
+    } else {
+      return hiveTypeToSqlType(typeStringToHiveType(type));
+    }
+  }
+
   static String getColumnTypeName(String type) throws SQLException {
     // we need to convert the Hive type to the SQL type name
     // TODO: this would be better handled in an enum
@@ -168,11 +199,15 @@ public class JdbcColumn {
       return serdeConstants.TIMESTAMP_TYPE_NAME;
     } else if ("date".equalsIgnoreCase(type)) {
       return serdeConstants.DATE_TYPE_NAME;
+    } else if ("interval_year_month".equalsIgnoreCase(type)) {
+      return serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME;
+    } else if ("interval_day_time".equalsIgnoreCase(type)) {
+      return serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME;
     } else if ("decimal".equalsIgnoreCase(type)) {
       return serdeConstants.DECIMAL_TYPE_NAME;
     } else if ("binary".equalsIgnoreCase(type)) {
       return serdeConstants.BINARY_TYPE_NAME;
-    } else if ("void".equalsIgnoreCase(type)) {
+    } else if ("void".equalsIgnoreCase(type) || "null".equalsIgnoreCase(type)) {
       return serdeConstants.VOID_TYPE_NAME;
     } else if (type.equalsIgnoreCase("map")) {
       return serdeConstants.MAP_TYPE_NAME;
@@ -185,26 +220,27 @@ public class JdbcColumn {
     throw new SQLException("Unrecognized column type: " + type);
   }
 
-  static int columnDisplaySize(int columnType, JdbcColumnAttributes columnAttributes)
+  static int columnDisplaySize(Type hiveType, JdbcColumnAttributes columnAttributes)
       throws SQLException {
     // according to hiveTypeToSqlType possible options are:
+    int columnType = hiveTypeToSqlType(hiveType);
     switch(columnType) {
     case Types.BOOLEAN:
-      return columnPrecision(columnType, columnAttributes);
+      return columnPrecision(hiveType, columnAttributes);
     case Types.CHAR:
     case Types.VARCHAR:
-      return columnPrecision(columnType, columnAttributes);
+      return columnPrecision(hiveType, columnAttributes);
     case Types.BINARY:
       return Integer.MAX_VALUE; // hive has no max limit for binary
     case Types.TINYINT:
     case Types.SMALLINT:
     case Types.INTEGER:
     case Types.BIGINT:
-      return columnPrecision(columnType, columnAttributes) + 1; // allow +/-
+      return columnPrecision(hiveType, columnAttributes) + 1; // allow +/-
     case Types.DATE:
       return 10;
     case Types.TIMESTAMP:
-      return columnPrecision(columnType, columnAttributes);
+      return columnPrecision(hiveType, columnAttributes);
 
     // see http://download.oracle.com/javase/6/docs/api/constant-values.html#java.lang.Float.MAX_EXPONENT
     case Types.FLOAT:
@@ -213,8 +249,10 @@ public class JdbcColumn {
     case Types.DOUBLE:
       return 25; // e.g. -(17#).e-####
     case Types.DECIMAL:
-      return columnPrecision(columnType, columnAttributes) + 2;  // '-' sign and '.'
+      return columnPrecision(hiveType, columnAttributes) + 2;  // '-' sign and '.'
+    case Types.OTHER:
     case Types.JAVA_OBJECT:
+      return columnPrecision(hiveType, columnAttributes);
     case Types.ARRAY:
     case Types.STRUCT:
       return Integer.MAX_VALUE;
@@ -223,8 +261,9 @@ public class JdbcColumn {
     }
   }
 
-  static int columnPrecision(int columnType, JdbcColumnAttributes columnAttributes)
+  static int columnPrecision(Type hiveType, JdbcColumnAttributes columnAttributes)
       throws SQLException {
+    int columnType = hiveTypeToSqlType(hiveType);
     // according to hiveTypeToSqlType possible options are:
     switch(columnType) {
     case Types.BOOLEAN:
@@ -255,7 +294,19 @@ public class JdbcColumn {
       return 29;
     case Types.DECIMAL:
       return columnAttributes.precision;
-    case Types.JAVA_OBJECT:
+    case Types.OTHER:
+    case Types.JAVA_OBJECT: {
+      switch (hiveType) {
+        case INTERVAL_YEAR_MONTH_TYPE:
+          // -yyyyyyy-mm  : should be more than enough
+          return 11;
+        case INTERVAL_DAY_TIME_TYPE:
+          // -ddddddddd hh:mm:ss.nnnnnnnnn
+          return 29;
+        default:
+          return Integer.MAX_VALUE;
+      }
+    }
     case Types.ARRAY:
     case Types.STRUCT:
       return Integer.MAX_VALUE;
@@ -264,8 +315,9 @@ public class JdbcColumn {
     }
   }
 
-  static int columnScale(int columnType, JdbcColumnAttributes columnAttributes)
+  static int columnScale(Type hiveType, JdbcColumnAttributes columnAttributes)
       throws SQLException {
+    int columnType = hiveTypeToSqlType(hiveType);
     // according to hiveTypeToSqlType possible options are:
     switch(columnType) {
     case Types.BOOLEAN:
@@ -286,6 +338,7 @@ public class JdbcColumn {
       return 9;
     case Types.DECIMAL:
       return columnAttributes.scale;
+    case Types.OTHER:
     case Types.JAVA_OBJECT:
     case Types.ARRAY:
     case Types.STRUCT:

Modified: hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java (original)
+++ hive/branches/spark/jdbc/src/java/org/apache/hive/jdbc/Utils.java Tue Apr 14 23:36:02 2015
@@ -34,6 +34,8 @@ import org.apache.commons.logging.LogFac
 import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.cli.thrift.TStatus;
 import org.apache.hive.service.cli.thrift.TStatusCode;
+import org.apache.http.client.CookieStore;
+import org.apache.http.cookie.Cookie;
 
 public class Utils {
   public static final Log LOG = LogFactory.getLog(Utils.class.getName());
@@ -56,6 +58,11 @@ public class Utils {
 
   private static final String URI_HIVE_PREFIX = "hive2:";
 
+  // This value is set to true by the setServiceUnavailableRetryStrategy() when the server returns 401
+  static final String HIVE_SERVER2_RETRY_KEY = "hive.server2.retryserver";
+  static final String HIVE_SERVER2_RETRY_TRUE = "true";
+  static final String HIVE_SERVER2_RETRY_FALSE = "false";
+
   public static class JdbcConnectionParams {
     // Note on client side parameter naming convention:
     // Prefer using a shorter camelCase param name instead of using the same name as the
@@ -98,6 +105,11 @@ public class Utils {
     // Default namespace value on ZooKeeper.
     // This value is used if the param "zooKeeperNamespace" is not specified in the JDBC Uri.
     static final String ZOOKEEPER_DEFAULT_NAMESPACE = "hiveserver2";
+    static final String COOKIE_AUTH = "cookieAuth";
+    static final String COOKIE_AUTH_FALSE = "false";
+    static final String COOKIE_NAME = "cookieName";
+    // The default value of the cookie name when CookieAuth=true
+    static final String DEFAULT_COOKIE_NAMES_HS2 = "hive.server2.auth";
 
     // Non-configurable params:
     // Currently supports JKS keystore format
@@ -560,4 +572,28 @@ public class Utils {
     }
     return version;
   }
+
+  /**
+   * The function iterates through the list of cookies in the cookiestore and tries to
+   * match them with the cookieName. If there is a match, the cookieStore already
+   * has a valid cookie and the client need not send Credentials for validation purpose.
+   * @param cookieStore The cookie Store
+   * @param cookieName Name of the cookie which needs to be validated
+   * @return true or false based on whether the client needs to send the credentials or
+   * not to the server.
+   */
+  static boolean needToSendCredentials(CookieStore cookieStore, String cookieName) {
+    if (cookieName == null || cookieStore == null) {
+      return true;
+    }
+
+    List<Cookie> cookies = cookieStore.getCookies();
+
+    for (Cookie c : cookies) {
+      if (c.getName().equals(cookieName)) {
+        return false;
+      }
+    }
+    return true;
+  }
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue Apr 14 23:36:02 2015
@@ -5487,7 +5487,7 @@ public class HiveMetaStore extends Thrif
         ex = e;
         throw newMetaException(e);
       } finally {
-        endFunction("get_database", func != null, ex);
+        endFunction("get_function", func != null, ex);
       }
 
       return func;
@@ -6060,6 +6060,11 @@ public class HiveMetaStore extends Thrif
         // don't doom the rest of the metastore.
         startLock.lock();
         try {
+          startPauseMonitor(conf);
+        } catch (Throwable t) {
+          LOG.warn("Error starting the JVM pause monitor", t);
+        }
+        try {
           // Per the javadocs on Condition, do not depend on the condition alone as a start gate
           // since spurious wake ups are possible.
           while (!startedServing.get()) startCondition.await();
@@ -6078,6 +6083,18 @@ public class HiveMetaStore extends Thrif
     t.start();
   }
 
+  private static void startPauseMonitor(HiveConf conf) throws Exception {
+    try {
+      Class.forName("org.apache.hadoop.util.JvmPauseMonitor");
+      org.apache.hadoop.util.JvmPauseMonitor pauseMonitor =
+        new org.apache.hadoop.util.JvmPauseMonitor(conf);
+      pauseMonitor.start();
+    } catch (Throwable t) {
+      LOG.warn("Could not initiate the JvmPauseMonitor thread." +
+               " GCs and Pauses may not be warned upon.", t);
+    }
+  }
+
   private static void startCompactorInitiator(HiveConf conf) throws Exception {
     if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_COMPACTOR_INITIATOR_ON)) {
       MetaStoreThread initiator =

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IExtrapolatePartStatus.java Tue Apr 14 23:36:02 2015
@@ -25,23 +25,33 @@ public interface IExtrapolatePartStatus
   /**
    * The sequence of colStatNames.
    */
-  static String[] colStatNames = new String[] { "LONG_LOW_VALUE",
-      "LONG_HIGH_VALUE", "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE",
-      "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE", "NUM_NULLS",
-      "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES" };
-  
+  static String[] colStatNames = new String[] { "LONG_LOW_VALUE", "LONG_HIGH_VALUE",
+      "DOUBLE_LOW_VALUE", "DOUBLE_HIGH_VALUE", "BIG_DECIMAL_LOW_VALUE", "BIG_DECIMAL_HIGH_VALUE",
+      "NUM_NULLS", "NUM_DISTINCTS", "AVG_COL_LEN", "MAX_COL_LEN", "NUM_TRUES", "NUM_FALSES",
+      "AVG_NDV_LONG", "AVG_NDV_DOUBLE", "AVG_NDV_DECIMAL", "SUM_NUM_DISTINCTS" };
+
   /**
    * The indexes for colstats.
    */
-  static HashMap<String, Integer[]> indexMaps = new HashMap<String, Integer []>(){{
-    put("long", new Integer [] {0,1,6,7});
-    put("double", new Integer [] {2,3,6,7});
-    put("string", new Integer [] {8,9,6,7});
-    put("boolean", new Integer [] {10,11,6});
-    put("binary", new Integer [] {8,9,6});
-    put("decimal", new Integer [] {4,5,6,7});
-    put("default", new Integer [] {0,1,2,3,4,5,6,7,8,9,10,11});
-}};
+  static HashMap<String, Integer[]> indexMaps = new HashMap<String, Integer[]>() {
+    {
+      put("bigint", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("int", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("smallint", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("tinyint", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("timestamp", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("long", new Integer[] { 0, 1, 6, 7, 12, 15 });
+      put("double", new Integer[] { 2, 3, 6, 7, 13, 15 });
+      put("float", new Integer[] { 2, 3, 6, 7, 13, 15 });
+      put("varchar", new Integer[] { 8, 9, 6, 7, 15 });
+      put("char", new Integer[] { 8, 9, 6, 7, 15 });
+      put("string", new Integer[] { 8, 9, 6, 7, 15 });
+      put("boolean", new Integer[] { 10, 11, 6, 15 });
+      put("binary", new Integer[] { 8, 9, 6, 15 });
+      put("decimal", new Integer[] { 4, 5, 6, 7, 14, 15 });
+      put("default", new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15 });
+    }
+  };
 
   /**
    * The sequence of colStatTypes.
@@ -50,23 +60,24 @@ public interface IExtrapolatePartStatus
     Long, Double, Decimal
   }
 
-  static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long,
-      ColStatType.Long, ColStatType.Double, ColStatType.Double,
-      ColStatType.Decimal, ColStatType.Decimal, ColStatType.Long,
-      ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long,
+  static ColStatType[] colStatTypes = new ColStatType[] { ColStatType.Long, ColStatType.Long,
+      ColStatType.Double, ColStatType.Double, ColStatType.Decimal, ColStatType.Decimal,
+      ColStatType.Long, ColStatType.Long, ColStatType.Double, ColStatType.Long, ColStatType.Long,
+      ColStatType.Long, ColStatType.Double, ColStatType.Double, ColStatType.Double,
       ColStatType.Long };
 
   /**
    * The sequence of aggregation function on colStats.
    */
   static enum AggrType {
-    Min, Max, Sum
+    Min, Max, Sum, Avg
   }
 
-  static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max,
-      AggrType.Min, AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum,
-      AggrType.Max, AggrType.Max, AggrType.Max, AggrType.Sum, AggrType.Sum };
-  
+  static AggrType[] aggrTypes = new AggrType[] { AggrType.Min, AggrType.Max, AggrType.Min,
+      AggrType.Max, AggrType.Min, AggrType.Max, AggrType.Sum, AggrType.Max, AggrType.Max,
+      AggrType.Max, AggrType.Sum, AggrType.Sum, AggrType.Avg, AggrType.Avg, AggrType.Avg,
+      AggrType.Sum };
+
   public Object extrapolate(Object[] min, Object[] max, int colStatIndex,
       Map<String, Integer> indexMap);
 

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/LinearExtrapolatePartStatus.java Tue Apr 14 23:36:02 2015
@@ -19,11 +19,8 @@
 package org.apache.hadoop.hive.metastore;
 
 import java.math.BigDecimal;
-import java.nio.ByteBuffer;
 import java.util.Map;
 
-import org.apache.hadoop.hive.metastore.api.Decimal;
-
 public class LinearExtrapolatePartStatus implements IExtrapolatePartStatus {
 
   @Override
@@ -35,6 +32,15 @@ public class LinearExtrapolatePartStatus
     if (minInd == maxInd) {
       return min[0];
     }
+    //note that recent metastore stores decimal in string.
+    double decimalmin= 0;
+    double decimalmax = 0;
+    if (colStatTypes[colStatIndex] == ColStatType.Decimal) {
+      BigDecimal bdmin = new BigDecimal(min[0].toString());
+      decimalmin = bdmin.doubleValue();
+      BigDecimal bdmax = new BigDecimal(max[0].toString());
+      decimalmax = bdmax.doubleValue();
+    }
     if (aggrTypes[colStatIndex] == AggrType.Max) {
       if (minInd < maxInd) {
         // right border is the max
@@ -45,15 +51,9 @@ public class LinearExtrapolatePartStatus
           return (Double) ((Double) min[0] + (((Double) max[0] - (Double) min[0])
               * (rightBorderInd - minInd) / (maxInd - minInd)));
         } else {
-          Decimal dmax = (Decimal) max[0];
-          BigDecimal bdmax = new BigDecimal(dmax.toString());
-          double doublemax = bdmax.doubleValue();
-          Decimal dmin = (Decimal) min[0];
-          BigDecimal bdmin = new BigDecimal(dmin.toString());
-          double doublemin = bdmin.doubleValue();
-          double ret = doublemin + (doublemax - doublemin)
+          double ret = decimalmin + (decimalmax - decimalmin)
               * (rightBorderInd - minInd) / (maxInd - minInd);
-          return createThriftDecimal(String.valueOf(ret));
+          return String.valueOf(ret);
         }
       } else {
         // left border is the max
@@ -62,17 +62,11 @@ public class LinearExtrapolatePartStatus
               * minInd / (minInd - maxInd));
         } else if (colStatTypes[colStatIndex] == ColStatType.Double) {
           return (Double) ((Double) min[0] + ((Double) max[0] - (Double) min[0])
-              * minInd / (maxInd - minInd));
+              * minInd / (minInd - maxInd));
         } else {
-          Decimal dmax = (Decimal) max[0];
-          BigDecimal bdmax = new BigDecimal(dmax.toString());
-          double doublemax = bdmax.doubleValue();
-          Decimal dmin = (Decimal) min[0];
-          BigDecimal bdmin = new BigDecimal(dmin.toString());
-          double doublemin = bdmin.doubleValue();
-          double ret = doublemin + (doublemax - doublemin) * minInd
-              / (maxInd - minInd);
-          return createThriftDecimal(String.valueOf(ret));
+          double ret = decimalmin + (decimalmax - decimalmin) * minInd
+              / (minInd - maxInd);
+          return String.valueOf(ret);
         }
       }
     } else {
@@ -87,16 +81,9 @@ public class LinearExtrapolatePartStatus
               * maxInd / (maxInd - minInd);
           return ret;
         } else {
-          Decimal dmax = (Decimal) max[0];
-          BigDecimal bdmax = new BigDecimal(dmax.toString());
-          double doublemax = bdmax.doubleValue();
-          Decimal dmin = (Decimal) min[0];
-          BigDecimal bdmin = new BigDecimal(dmin.toString());
-          double doublemin = bdmin.doubleValue();
-          double ret = doublemax - (doublemax - doublemin) * maxInd
+          double ret = decimalmax - (decimalmax - decimalmin) * maxInd
               / (maxInd - minInd);
-          return createThriftDecimal(String.valueOf(ret));
-
+          return String.valueOf(ret);
         }
       } else {
         // right border is the min
@@ -109,24 +96,11 @@ public class LinearExtrapolatePartStatus
               * (rightBorderInd - maxInd) / (minInd - maxInd);
           return ret;
         } else {
-          Decimal dmax = (Decimal) max[0];
-          BigDecimal bdmax = new BigDecimal(dmax.toString());
-          double doublemax = bdmax.doubleValue();
-          Decimal dmin = (Decimal) min[0];
-          BigDecimal bdmin = new BigDecimal(dmin.toString());
-          double doublemin = bdmin.doubleValue();
-          double ret = doublemax - (doublemax - doublemin)
+          double ret = decimalmax - (decimalmax - decimalmin)
               * (rightBorderInd - maxInd) / (minInd - maxInd);
-          return createThriftDecimal(String.valueOf(ret));
+          return String.valueOf(ret);
         }
       }
     }
   }
-
-  private static Decimal createThriftDecimal(String s) {
-    BigDecimal d = new BigDecimal(s);
-    return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()),
-        (short) d.scale());
-  }
-
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Tue Apr 14 23:36:02 2015
@@ -803,6 +803,15 @@ class MetaStoreDirectSql {
     if (value == null) return null;
     return value.toString();
   }
+  
+  static Double extractSqlDouble(Object obj) throws MetaException {
+    if (obj == null)
+      return null;
+    if (!(obj instanceof Number)) {
+      throw new MetaException("Expected numeric type but got " + obj.getClass().getName());
+    }
+    return ((Number) obj).doubleValue();
+  }
 
   private static String trimCommaList(StringBuilder sb) {
     if (sb.length() > 0) {
@@ -852,6 +861,7 @@ class MetaStoreDirectSql {
         func.apply(entry.getValue(), fields);
         fields = null;
       }
+      Deadline.checkTimeout();
     }
     int rv = list.size();
     query.closeAll();
@@ -1081,10 +1091,13 @@ class MetaStoreDirectSql {
   }
 
   public AggrStats aggrColStatsForPartitions(String dbName, String tableName,
-      List<String> partNames, List<String> colNames) throws MetaException {
+      List<String> partNames, List<String> colNames, boolean useDensityFunctionForNDVEstimation) throws MetaException {
     long partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames);
     List<ColumnStatisticsObj> stats = columnStatisticsObjForPartitions(dbName,
-        tableName, partNames, colNames, partsFound);
+        tableName, partNames, colNames, partsFound, useDensityFunctionForNDVEstimation);
+    LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation
+        + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = "
+        + Arrays.toString(stats.toArray()));
     return new AggrStats(stats, partsFound);
   }
 
@@ -1113,15 +1126,33 @@ class MetaStoreDirectSql {
     return partsFound;
   }
 
-  private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(
-      String dbName, String tableName, List<String> partNames,
-      List<String> colNames, long partsFound) throws MetaException {
+  private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(String dbName,
+      String tableName, List<String> partNames, List<String> colNames, long partsFound, boolean useDensityFunctionForNDVEstimation)
+      throws MetaException {
     // TODO: all the extrapolation logic should be moved out of this class,
-    //       only mechanical data retrieval should remain here.
+    // only mechanical data retrieval should remain here.
     String commonPrefix = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
         + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), "
-        + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
-        + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\""
+        + "min(cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal)), max(cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)), "
+        + "sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
+        + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), "
+        // The following data is used to compute a partitioned table's NDV based
+        // on partitions' NDV when useDensityFunctionForNDVEstimation = true. Global NDVs cannot be
+        // accurately derived from partition NDVs, because the domain of column value two partitions
+        // can overlap. If there is no overlap then global NDV is just the sum
+        // of partition NDVs (UpperBound). But if there is some overlay then
+        // global NDV can be anywhere between sum of partition NDVs (no overlap)
+        // and same as one of the partition NDV (domain of column value in all other
+        // partitions is subset of the domain value in one of the partition)
+        // (LowerBound).But under uniform distribution, we can roughly estimate the global
+        // NDV by leveraging the min/max values.
+        // And, we also guarantee that the estimation makes sense by comparing it to the
+        // UpperBound (calculated by "sum(\"NUM_DISTINCTS\")")
+        // and LowerBound (calculated by "max(\"NUM_DISTINCTS\")")
+        + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal)),"
+        + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\"),"
+        + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\"),"
+        + "sum(\"NUM_DISTINCTS\")" + " from \"PART_COL_STATS\""
         + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? ";
     String queryText = null;
     long start = 0;
@@ -1133,14 +1164,13 @@ class MetaStoreDirectSql {
     // Check if the status of all the columns of all the partitions exists
     // Extrapolation is not needed.
     if (partsFound == partNames.size()) {
-      queryText = commonPrefix
-          + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")"
+      queryText = commonPrefix + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")"
           + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
           + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
       start = doTrace ? System.nanoTime() : 0;
       query = pm.newQuery("javax.jdo.query.SQL", queryText);
-      qResult = executeWithArray(query, prepareParams(
-          dbName, tableName, partNames, colNames), queryText);
+      qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames),
+          queryText);
       if (qResult == null) {
         query.closeAll();
         return Lists.newArrayList();
@@ -1148,10 +1178,10 @@ class MetaStoreDirectSql {
       end = doTrace ? System.nanoTime() : 0;
       timingTrace(doTrace, queryText, start, end);
       List<Object[]> list = ensureList(qResult);
-      List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(
-          list.size());
+      List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
       for (Object[] row : list) {
-        colStats.add(prepareCSObj(row, 0));
+        colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
+        Deadline.checkTimeout();
       }
       query.closeAll();
       return colStats;
@@ -1159,18 +1189,16 @@ class MetaStoreDirectSql {
       // Extrapolation is needed for some columns.
       // In this case, at least a column status for a partition is missing.
       // We need to extrapolate this partition based on the other partitions
-      List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(
-          colNames.size());
+      List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
       queryText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", count(\"PARTITION_NAME\") "
-          + " from \"PART_COL_STATS\""
-          + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
+          + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
           + " and \"COLUMN_NAME\" in (" + makeParams(colNames.size()) + ")"
           + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
           + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
       start = doTrace ? System.nanoTime() : 0;
       query = pm.newQuery("javax.jdo.query.SQL", queryText);
-      qResult = executeWithArray(query, prepareParams(
-          dbName, tableName, partNames, colNames), queryText);
+      qResult = executeWithArray(query, prepareParams(dbName, tableName, partNames, colNames),
+          queryText);
       end = doTrace ? System.nanoTime() : 0;
       timingTrace(doTrace, queryText, start, end);
       if (qResult == null) {
@@ -1193,25 +1221,26 @@ class MetaStoreDirectSql {
         } else {
           extraColumnNameTypeParts.put(colName, new String[] { colType, String.valueOf(count) });
         }
+        Deadline.checkTimeout();
       }
       query.closeAll();
       // Extrapolation is not needed for columns noExtraColumnNames
       if (noExtraColumnNames.size() != 0) {
-        queryText = commonPrefix
-            + " and \"COLUMN_NAME\" in ("+ makeParams(noExtraColumnNames.size()) + ")"
-            + " and \"PARTITION_NAME\" in ("+ makeParams(partNames.size()) +")"
-            + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+        queryText = commonPrefix + " and \"COLUMN_NAME\" in ("
+            + makeParams(noExtraColumnNames.size()) + ")" + " and \"PARTITION_NAME\" in ("
+            + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
         start = doTrace ? System.nanoTime() : 0;
         query = pm.newQuery("javax.jdo.query.SQL", queryText);
-        qResult = executeWithArray(query, prepareParams(
-            dbName, tableName, partNames, noExtraColumnNames), queryText);
+        qResult = executeWithArray(query,
+            prepareParams(dbName, tableName, partNames, noExtraColumnNames), queryText);
         if (qResult == null) {
           query.closeAll();
           return Lists.newArrayList();
         }
         list = ensureList(qResult);
         for (Object[] row : list) {
-          colStats.add(prepareCSObj(row, 0));
+          colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
+          Deadline.checkTimeout();
         }
         end = doTrace ? System.nanoTime() : 0;
         timingTrace(doTrace, queryText, start, end);
@@ -1226,37 +1255,42 @@ class MetaStoreDirectSql {
         }
         // get sum for all columns to reduce the number of queries
         Map<String, Map<Integer, Object>> sumMap = new HashMap<String, Map<Integer, Object>>();
-        queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")"
+        queryText = "select \"COLUMN_NAME\", sum(\"NUM_NULLS\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\"), sum(\"NUM_DISTINCTS\")"
             + " from \"PART_COL_STATS\""
             + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? "
-            + " and \"COLUMN_NAME\" in (" +makeParams(extraColumnNameTypeParts.size())+ ")"
-            + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
+            + " and \"COLUMN_NAME\" in ("
+            + makeParams(extraColumnNameTypeParts.size())
+            + ")"
+            + " and \"PARTITION_NAME\" in ("
+            + makeParams(partNames.size())
+            + ")"
             + " group by \"COLUMN_NAME\"";
         start = doTrace ? System.nanoTime() : 0;
         query = pm.newQuery("javax.jdo.query.SQL", queryText);
         List<String> extraColumnNames = new ArrayList<String>();
         extraColumnNames.addAll(extraColumnNameTypeParts.keySet());
-        qResult = executeWithArray(query, prepareParams(
-            dbName, tableName, partNames, extraColumnNames), queryText);
+        qResult = executeWithArray(query,
+            prepareParams(dbName, tableName, partNames, extraColumnNames), queryText);
         if (qResult == null) {
           query.closeAll();
           return Lists.newArrayList();
         }
         list = ensureList(qResult);
         // see the indexes for colstats in IExtrapolatePartStatus
-        Integer[] sumIndex = new Integer[] { 6, 10, 11 };
+        Integer[] sumIndex = new Integer[] { 6, 10, 11, 15 };
         for (Object[] row : list) {
           Map<Integer, Object> indexToObject = new HashMap<Integer, Object>();
           for (int ind = 1; ind < row.length; ind++) {
             indexToObject.put(sumIndex[ind - 1], row[ind]);
           }
+          // row[0] is the column name
           sumMap.put((String) row[0], indexToObject);
+          Deadline.checkTimeout();
         }
         end = doTrace ? System.nanoTime() : 0;
         timingTrace(doTrace, queryText, start, end);
         query.closeAll();
-        for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts
-            .entrySet()) {
+        for (Map.Entry<String, String[]> entry : extraColumnNameTypeParts.entrySet()) {
           Object[] row = new Object[IExtrapolatePartStatus.colStatNames.length + 2];
           String colName = entry.getKey();
           String colType = entry.getValue()[0];
@@ -1265,12 +1299,20 @@ class MetaStoreDirectSql {
           row[0] = colName;
           // fill in coltype
           row[1] = colType;
-          // use linear extrapolation. more complicated one can be added in the future.
+          // use linear extrapolation. more complicated one can be added in the
+          // future.
           IExtrapolatePartStatus extrapolateMethod = new LinearExtrapolatePartStatus();
           // fill in colstatus
-          Integer[] index = IExtrapolatePartStatus.indexMaps.get(colType
-              .toLowerCase());
-          //if the colType is not the known type, long, double, etc, then get all index.
+          Integer[] index = null;
+          boolean decimal = false;
+          if (colType.toLowerCase().startsWith("decimal")) {
+            index = IExtrapolatePartStatus.indexMaps.get("decimal");
+            decimal = true;
+          } else {
+            index = IExtrapolatePartStatus.indexMaps.get(colType.toLowerCase());
+          }
+          // if the colType is not the known type, long, double, etc, then get
+          // all index.
           if (index == null) {
             index = IExtrapolatePartStatus.indexMaps.get("default");
           }
@@ -1285,20 +1327,27 @@ class MetaStoreDirectSql {
                 Long val = extractSqlLong(o);
                 row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size()));
               }
-            } else {
+            } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min
+                || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
               // if the aggregation type is min/max, we extrapolate from the
               // left/right borders
-              queryText = "select \""
-                  + colStatName
-                  + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\""
-                  + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?"
-                  + " and \"COLUMN_NAME\" = ?"
-                  + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
-                  + " order by \'" + colStatName + "\'";
+              if (!decimal) {
+                queryText = "select \"" + colStatName
+                    + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\""
+                    + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?"
+                    + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
+                    + " order by \"" + colStatName + "\"";
+              } else {
+                queryText = "select \"" + colStatName
+                    + "\",\"PARTITION_NAME\" from \"PART_COL_STATS\""
+                    + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?" + " and \"COLUMN_NAME\" = ?"
+                    + " and \"PARTITION_NAME\" in (" + makeParams(partNames.size()) + ")"
+                    + " order by cast(\"" + colStatName + "\" as decimal)";
+              }
               start = doTrace ? System.nanoTime() : 0;
               query = pm.newQuery("javax.jdo.query.SQL", queryText);
-              qResult = executeWithArray(query, prepareParams(
-                  dbName, tableName, partNames, Arrays.asList(colName)), queryText);
+              qResult = executeWithArray(query,
+                  prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
               if (qResult == null) {
                 query.closeAll();
                 return Lists.newArrayList();
@@ -1312,12 +1361,39 @@ class MetaStoreDirectSql {
               if (min[0] == null || max[0] == null) {
                 row[2 + colStatIndex] = null;
               } else {
-                row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max,
-                    colStatIndex, indexMap);
+                row[2 + colStatIndex] = extrapolateMethod.extrapolate(min, max, colStatIndex,
+                    indexMap);
               }
+            } else {
+              // if the aggregation type is avg, we use the average on the
+              // existing ones.
+              queryText = "select "
+                  + "avg((\"LONG_HIGH_VALUE\"-\"LONG_LOW_VALUE\")/cast(\"NUM_DISTINCTS\" as decimal)),"
+                  + "avg((\"DOUBLE_HIGH_VALUE\"-\"DOUBLE_LOW_VALUE\")/\"NUM_DISTINCTS\"),"
+                  + "avg((cast(\"BIG_DECIMAL_HIGH_VALUE\" as decimal)-cast(\"BIG_DECIMAL_LOW_VALUE\" as decimal))/\"NUM_DISTINCTS\")"
+                  + " from \"PART_COL_STATS\"" + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ?"
+                  + " and \"COLUMN_NAME\" = ?" + " and \"PARTITION_NAME\" in ("
+                  + makeParams(partNames.size()) + ")" + " group by \"COLUMN_NAME\"";
+              start = doTrace ? System.nanoTime() : 0;
+              query = pm.newQuery("javax.jdo.query.SQL", queryText);
+              qResult = executeWithArray(query,
+                  prepareParams(dbName, tableName, partNames, Arrays.asList(colName)), queryText);
+              if (qResult == null) {
+                query.closeAll();
+                return Lists.newArrayList();
+              }
+              fqr = (ForwardQueryResult) qResult;
+              Object[] avg = (Object[]) (fqr.get(0));
+              // colStatIndex=12,13,14 respond to "AVG_LONG", "AVG_DOUBLE",
+              // "AVG_DECIMAL"
+              row[2 + colStatIndex] = avg[colStatIndex - 12];
+              end = doTrace ? System.nanoTime() : 0;
+              timingTrace(doTrace, queryText, start, end);
+              query.closeAll();
             }
           }
-          colStats.add(prepareCSObj(row, 0));
+          colStats.add(prepareCSObjWithAdjustedNDV(row, 0, useDensityFunctionForNDVEstimation));
+          Deadline.checkTimeout();
         }
       }
       return colStats;
@@ -1335,6 +1411,17 @@ class MetaStoreDirectSql {
     return cso;
   }
 
+  private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i,
+      boolean useDensityFunctionForNDVEstimation) throws MetaException {
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    ColumnStatisticsObj cso = new ColumnStatisticsObj((String) row[i++], (String) row[i++], data);
+    Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++], avgLong = row[i++], avgDouble = row[i++], avgDecimal = row[i++], sumDist = row[i++];
+    StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh,
+        declow, dechigh, nulls, dist, avglen, maxlen, trues, falses, avgLong, avgDouble,
+        avgDecimal, sumDist, useDensityFunctionForNDVEstimation);
+    return cso;
+  }
+  
   private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
     List<String> colNames) throws MetaException {
 
@@ -1389,6 +1476,7 @@ class MetaStoreDirectSql {
       }
       lastPartName = partName;
       from = i;
+      Deadline.checkTimeout();
     }
 
     timingTrace(doTrace, queryText, start, queryTime);
@@ -1416,6 +1504,7 @@ class MetaStoreDirectSql {
         csd.setLastAnalyzed(extractSqlLong(laObj));
       }
       csos.add(prepareCSObj(row, offset));
+      Deadline.checkTimeout();
     }
     result.setStatsObj(csos);
     return result;

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java Tue Apr 14 23:36:02 2015
@@ -620,10 +620,10 @@ public class MetaStoreUtils {
    */
   static public boolean validateColumnType(String type) {
     int last = 0;
-    boolean lastAlphaDigit = Character.isLetterOrDigit(type.charAt(last));
+    boolean lastAlphaDigit = isValidTypeChar(type.charAt(last));
     for (int i = 1; i <= type.length(); i++) {
       if (i == type.length()
-          || Character.isLetterOrDigit(type.charAt(i)) != lastAlphaDigit) {
+          || isValidTypeChar(type.charAt(i)) != lastAlphaDigit) {
         String token = type.substring(last, i);
         last = i;
         if (!hiveThriftTypeMap.contains(token)) {
@@ -635,6 +635,10 @@ public class MetaStoreUtils {
     return true;
   }
 
+  private static boolean isValidTypeChar(char c) {
+    return Character.isLetterOrDigit(c) || c == '_';
+  }
+
   public static String validateSkewedColNames(List<String> cols) {
     if (null == cols) {
       return null;
@@ -720,6 +724,12 @@ public class MetaStoreUtils {
             "timestamp");
     typeToThriftTypeMap.put(
         org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME, "decimal");
+    typeToThriftTypeMap.put(
+        org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME,
+        org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME);
+    typeToThriftTypeMap.put(
+        org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME,
+        org.apache.hadoop.hive.serde.serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME);
   }
 
   static Set<String> hiveThriftTypeMap; //for validation

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Apr 14 23:36:02 2015
@@ -1817,6 +1817,7 @@ public class ObjectStore implements RawS
     }
     for (MPartition mp : src) {
       dest.add(convertToPart(mp));
+      Deadline.checkTimeout();
     }
     return dest;
   }
@@ -1826,6 +1827,7 @@ public class ObjectStore implements RawS
     List<Partition> parts = new ArrayList<Partition>(mparts.size());
     for (MPartition mp : mparts) {
       parts.add(convertToPart(dbName, tblName, mp));
+      Deadline.checkTimeout();
     }
     return parts;
   }
@@ -3121,10 +3123,6 @@ public class ObjectStore implements RawS
     MTable origTable = mIndex.getOrigTable();
     MTable indexTable = mIndex.getIndexTable();
 
-    String[] qualified = MetaStoreUtils.getQualifiedName(
-        origTable.getDatabase().getName(), indexTable.getTableName());
-    String indexTableName = qualified[0] + "." + qualified[1];
-
     return new Index(
     mIndex.getIndexName(),
     mIndex.getIndexHandlerClass(),
@@ -3132,7 +3130,7 @@ public class ObjectStore implements RawS
     origTable.getTableName(),
     mIndex.getCreateTime(),
     mIndex.getLastAccessTime(),
-    indexTableName,
+    indexTable.getTableName(),
     convertToStorageDescriptor(mIndex.getSd()),
     mIndex.getParameters(),
     mIndex.getDeferredRebuild());
@@ -6053,6 +6051,7 @@ public class ObjectStore implements RawS
             desc.setLastAnalyzed(mStat.getLastAnalyzed());
           }
           statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat));
+          Deadline.checkTimeout();
         }
         return new ColumnStatistics(desc, statObjs);
       }
@@ -6101,6 +6100,7 @@ public class ObjectStore implements RawS
           }
           curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj));
           lastPartName = partName;
+          Deadline.checkTimeout();
         }
         return result;
       }
@@ -6111,12 +6111,13 @@ public class ObjectStore implements RawS
   @Override
   public AggrStats get_aggr_stats_for(String dbName, String tblName,
       final List<String> partNames, final List<String> colNames) throws MetaException, NoSuchObjectException {
+    final boolean  useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(), HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
     return new GetHelper<AggrStats>(dbName, tblName, true, false) {
       @Override
       protected AggrStats getSqlResult(GetHelper<AggrStats> ctx)
           throws MetaException {
         return directSql.aggrColStatsForPartitions(dbName, tblName, partNames,
-            colNames);
+            colNames, useDensityFunctionForNDVEstimation);
       }
       @Override
       protected AggrStats getJdoResult(GetHelper<AggrStats> ctx)

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java Tue Apr 14 23:36:02 2015
@@ -24,13 +24,13 @@ import java.lang.reflect.InvocationTarge
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 import java.lang.reflect.UndeclaredThrowableException;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.thrift.TApplicationException;
 import org.apache.thrift.TException;
@@ -51,14 +51,17 @@ public class RetryingMetaStoreClient imp
   private final IMetaStoreClient base;
   private final int retryLimit;
   private final long retryDelaySeconds;
+  private final Map<String, Long> metaCallTimeMap;
+
 
 
 
   protected RetryingMetaStoreClient(HiveConf hiveConf, HiveMetaHookLoader hookLoader,
-      Class<? extends IMetaStoreClient> msClientClass) throws MetaException {
+      Map<String, Long> metaCallTimeMap, Class<? extends IMetaStoreClient> msClientClass) throws MetaException {
     this.retryLimit = hiveConf.getIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES);
     this.retryDelaySeconds = hiveConf.getTimeVar(
         HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, TimeUnit.SECONDS);
+    this.metaCallTimeMap = metaCallTimeMap;
 
     reloginExpiringKeytabUser();
     this.base = MetaStoreUtils.newInstance(msClientClass, new Class[] {
@@ -67,14 +70,20 @@ public class RetryingMetaStoreClient imp
 
   public static IMetaStoreClient getProxy(HiveConf hiveConf, HiveMetaHookLoader hookLoader,
       String mscClassName) throws MetaException {
+    return getProxy(hiveConf, hookLoader, null, mscClassName);
+  }
+
+  public static IMetaStoreClient getProxy(HiveConf hiveConf, HiveMetaHookLoader hookLoader,
+      Map<String, Long> metaCallTimeMap, String mscClassName) throws MetaException {
 
-    Class<? extends IMetaStoreClient> baseClass = (Class<? extends IMetaStoreClient>)
-        MetaStoreUtils.getClass(mscClassName);
+    Class<? extends IMetaStoreClient> baseClass = (Class<? extends IMetaStoreClient>) MetaStoreUtils
+        .getClass(mscClassName);
 
-    RetryingMetaStoreClient handler = new RetryingMetaStoreClient(hiveConf, hookLoader, baseClass);
+    RetryingMetaStoreClient handler = new RetryingMetaStoreClient(hiveConf, hookLoader,
+        metaCallTimeMap, baseClass);
 
-    return (IMetaStoreClient) Proxy.newProxyInstance(RetryingMetaStoreClient.class.getClassLoader(),
-        baseClass.getInterfaces(), handler);
+    return (IMetaStoreClient) Proxy.newProxyInstance(
+        RetryingMetaStoreClient.class.getClassLoader(), baseClass.getInterfaces(), handler);
   }
 
   @Override
@@ -88,7 +97,15 @@ public class RetryingMetaStoreClient imp
         if(retriesMade > 0){
           base.reconnect();
         }
-        ret = method.invoke(base, args);
+        if (metaCallTimeMap == null) {
+          ret = method.invoke(base, args);
+        } else {
+          // need to capture the timing
+          long startTime = System.currentTimeMillis();
+          ret = method.invoke(base, args);
+          long timeTaken = System.currentTimeMillis() - startTime;
+          addMethodTime(method, timeTaken);
+        }
         break;
       } catch (UndeclaredThrowableException e) {
         throw e.getCause();
@@ -116,6 +133,30 @@ public class RetryingMetaStoreClient imp
     return ret;
   }
 
+  private void addMethodTime(Method method, long timeTaken) {
+    String methodStr = getMethodString(method);
+    Long curTime = metaCallTimeMap.get(methodStr);
+    if (curTime != null) {
+      timeTaken += curTime;
+    }
+    metaCallTimeMap.put(methodStr, timeTaken);
+  }
+
+  /**
+   * @param method
+   * @return String representation with arg types. eg getDatabase_(String, )
+   */
+  private String getMethodString(Method method) {
+    StringBuilder methodSb = new StringBuilder(method.getName());
+    methodSb.append("_(");
+    for (Class<?> paramClass : method.getParameterTypes()) {
+      methodSb.append(paramClass.getSimpleName());
+      methodSb.append(", ");
+    }
+    methodSb.append(")");
+    return methodSb.toString();
+  }
+
   /**
    * Relogin if login user is logged in using keytab
    * Relogin is actually done by ugi code only if sufficient time has passed

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1673583&r1=1673582&r2=1673583&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Tue Apr 14 23:36:02 2015
@@ -476,6 +476,133 @@ public class StatObjectConverter {
     }
   }
 
+  public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
+      Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
+      Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses,
+      Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist,
+      boolean useDensityFunctionForNDVEstimation) throws MetaException {
+    colType = colType.toLowerCase();
+    if (colType.equals("boolean")) {
+      BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
+      boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));
+      boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues));
+      boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      data.setBooleanStats(boolStats);
+    } else if (colType.equals("string") || colType.startsWith("varchar")
+        || colType.startsWith("char")) {
+      StringColumnStatsData stringStats = new StringColumnStatsData();
+      stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      stringStats.setAvgColLen((Double) avglen);
+      stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
+      stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      data.setStringStats(stringStats);
+    } else if (colType.equals("binary")) {
+      BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+      binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      binaryStats.setAvgColLen((Double) avglen);
+      binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
+      data.setBinaryStats(binaryStats);
+    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint")
+        || colType.equals("tinyint") || colType.equals("timestamp")) {
+      LongColumnStatsData longStats = new LongColumnStatsData();
+      longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      if (lhigh != null) {
+        longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh));
+      }
+      if (llow != null) {
+        longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
+      }
+      long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
+      long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
+      if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null
+          && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) {
+        // We have estimation, lowerbound and higherbound. We use estimation if
+        // it is between lowerbound and higherbound.
+        long estimation = MetaStoreDirectSql
+            .extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql
+                .extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong));
+        if (estimation < lowerBound) {
+          longStats.setNumDVs(lowerBound);
+        } else if (estimation > higherBound) {
+          longStats.setNumDVs(higherBound);
+        } else {
+          longStats.setNumDVs(estimation);
+        }
+      } else {
+        longStats.setNumDVs(lowerBound);
+      }
+      data.setLongStats(longStats);
+    } else if (colType.equals("double") || colType.equals("float")) {
+      DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+      doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      if (dhigh != null) {
+        doubleStats.setHighValue((Double) dhigh);
+      }
+      if (dlow != null) {
+        doubleStats.setLowValue((Double) dlow);
+      }
+      long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
+      long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
+      if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null
+          && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) {
+        long estimation = MetaStoreDirectSql
+            .extractSqlLong((MetaStoreDirectSql.extractSqlLong(dhigh) - MetaStoreDirectSql
+                .extractSqlLong(dlow)) / MetaStoreDirectSql.extractSqlDouble(avgDouble));
+        if (estimation < lowerBound) {
+          doubleStats.setNumDVs(lowerBound);
+        } else if (estimation > higherBound) {
+          doubleStats.setNumDVs(higherBound);
+        } else {
+          doubleStats.setNumDVs(estimation);
+        }
+      } else {
+        doubleStats.setNumDVs(lowerBound);
+      }
+      data.setDoubleStats(doubleStats);
+    } else if (colType.startsWith("decimal")) {
+      DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+      decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
+      Decimal low = null;
+      Decimal high = null;
+      BigDecimal blow = null;
+      BigDecimal bhigh = null;
+      if (dechigh instanceof BigDecimal) {
+        bhigh = (BigDecimal) dechigh;
+        high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()),
+            (short) bhigh.scale());
+      } else if (dechigh instanceof String) {
+        bhigh = new BigDecimal((String) dechigh);
+        high = createThriftDecimal((String) dechigh);
+      }
+      decimalStats.setHighValue(high);
+      if (declow instanceof BigDecimal) {
+        blow = (BigDecimal) declow;
+        low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale());
+      } else if (dechigh instanceof String) {
+        blow = new BigDecimal((String) declow);
+        low = createThriftDecimal((String) declow);
+      }
+      decimalStats.setLowValue(low);
+      long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
+      long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
+      if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null
+          && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) {
+        long estimation = MetaStoreDirectSql.extractSqlLong(MetaStoreDirectSql.extractSqlLong(bhigh
+            .subtract(blow).floatValue() / MetaStoreDirectSql.extractSqlDouble(avgDecimal)));
+        if (estimation < lowerBound) {
+          decimalStats.setNumDVs(lowerBound);
+        } else if (estimation > higherBound) {
+          decimalStats.setNumDVs(higherBound);
+        } else {
+          decimalStats.setNumDVs(estimation);
+        }
+      } else {
+        decimalStats.setNumDVs(lowerBound);
+      }
+      data.setDecimalStats(decimalStats);
+    }
+  }
+
   private static Decimal createThriftDecimal(String s) {
     BigDecimal d = new BigDecimal(s);
     return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale());
@@ -484,4 +611,5 @@ public class StatObjectConverter {
   private static String createJdoDecimalString(Decimal d) {
     return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString();
   }
+
 }



Mime
View raw message