hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1526106 [8/8] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ metastore/if/ metastore/src/gen/thrift/gen-cpp/ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ metastore/src/gen/thrift/gen-php/metas...
Date Wed, 25 Sep 2013 04:57:48 GMT
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1526106&r1=1526105&r2=1526106&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Wed
Sep 25 04:57:46 2013
@@ -32,6 +32,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -241,6 +242,19 @@ public class PartitionPruner implements 
     return expr;
   }
 
+  /**
+   * @param expr Expression.
+   * @return True iff expr contains any non-native user-defined functions.
+   */
+  static private boolean hasUserFunctions(ExprNodeDesc expr) {
+    if (!(expr instanceof ExprNodeGenericFuncDesc)) return false;
+    if (!FunctionRegistry.isNativeFuncExpr((ExprNodeGenericFuncDesc)expr)) return true;
+    for (ExprNodeDesc child : expr.getChildren()) {
+      if (hasUserFunctions(child)) return true;
+    }
+    return false;
+  }
+
   private static PrunedPartitionList getPartitionsFromServer(Table tab,
       ExprNodeDesc prunerExpr, HiveConf conf, String alias) throws HiveException {
     try {
@@ -258,36 +272,55 @@ public class PartitionPruner implements 
       }
 
       if (prunerExpr == null) {
-        // This can happen when hive.mapred.mode=nonstrict and there is no predicates at
all.
+        // Non-strict mode, and there is no predicates at all - get everything.
         return new PrunedPartitionList(tab, Hive.get().getAllPartitionsForPruner(tab), false);
       }
 
-      // Remove virtual columns. See javadoc for details.
+      // Replace virtual columns with nulls. See javadoc for details.
       prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab));
-      // Remove all unknown parts e.g. non-partition columns. See javadoc for details.
+      // Remove all parts that are not partition columns. See javadoc for details.
       ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
       String oldFilter = prunerExpr.getExprString();
       if (compactExpr == null) {
-        // This could happen when hive.mapred.mode=nonstrict and all the predicates
-        // are on non-partition columns.
+        // Non-strict mode, and all the predicates are on non-partition columns - get everything.
         LOG.debug("Filter " + oldFilter + " was null after compacting");
         return new PrunedPartitionList(tab, Hive.get().getAllPartitionsForPruner(tab), true);
       }
 
-      Set<Partition> partitions = new LinkedHashSet<Partition>();
+      LOG.debug("Filter w/ compacting: " + compactExpr.getExprString()
+        + "; filter w/o compacting: " + oldFilter);
+
+      // Finally, check the filter for non-built-in UDFs. If these are present, we cannot
+      // do filtering on the server, and have to fall back to client path.
+      boolean doEvalClientSide = hasUserFunctions(compactExpr);
+
+      // Now filter.
+      List<Partition> partitions = new ArrayList<Partition>();
       boolean hasUnknownPartitions = false;
-      String message = Utilities.checkJDOPushDown(tab, compactExpr, null);
-      if (message != null) {
-        LOG.info(ErrorMsg.INVALID_JDO_FILTER_EXPRESSION.getMsg("by condition '"
-            + message + "'"));
-        hasUnknownPartitions = pruneBySequentialScan(tab, partitions, prunerExpr, conf);
-      } else {
-        String filter = compactExpr.getExprString();
-        LOG.debug("Filter w/ compacting: " + filter +"; filter w/o compacting: " + oldFilter);
-        hasUnknownPartitions = !filter.equals(oldFilter);
-        partitions.addAll(Hive.get().getPartitionsByFilter(tab, filter));
+      PerfLogger perfLogger = PerfLogger.getPerfLogger();
+      if (!doEvalClientSide) {
+        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
+        try {
+          hasUnknownPartitions = Hive.get().getPartitionsByExpr(
+              tab, compactExpr, conf, partitions);
+        } catch (IMetaStoreClient.IncompatibleMetastoreException ime) {
+          // TODO: backward compat for Hive <= 0.12. Can be removed later.
+          LOG.warn("Metastore doesn't support getPartitionsByExpr", ime);
+          doEvalClientSide = true;
+        } finally {
+          perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING);
+        }
       }
-      return new PrunedPartitionList(tab, partitions, hasUnknownPartitions);
+      if (doEvalClientSide) {
+        // Either we have user functions, or metastore is old version - filter names locally.
+        hasUnknownPartitions = pruneBySequentialScan(tab, partitions, compactExpr, conf);
+      }
+      // The partitions are "unknown" if the call says so due to the expression
+      // evaluator returning null for a partition, or if we sent a partial expression to
+      // metastore and so some partitions may have no data based on other filters.
+      boolean isPruningByExactFilter = oldFilter.equals(compactExpr.getExprString());
+      return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(partitions),
+          hasUnknownPartitions || !isPruningByExactFilter);
     } catch (HiveException e) {
       throw e;
     } catch (Exception e) {
@@ -297,15 +330,15 @@ public class PartitionPruner implements 
 
   /**
    * Pruning partition by getting the partition names first and pruning using Hive expression
-   * evaluator.
+   * evaluator on client.
    * @param tab the table containing the partitions.
    * @param partitions the resulting partitions.
    * @param prunerExpr the SQL predicate that involves partition columns.
    * @param conf Hive Configuration object, can not be NULL.
    * @return true iff the partition pruning expression contains non-partition columns.
    */
-  static private boolean pruneBySequentialScan(Table tab, Set<Partition> partitions,
-      ExprNodeDesc prunerExpr, HiveConf conf) throws Exception {
+  static private boolean pruneBySequentialScan(Table tab, List<Partition> partitions,
+      ExprNodeDesc prunerExpr, HiveConf conf) throws HiveException, MetaException {
     PerfLogger perfLogger = PerfLogger.getPerfLogger();
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRUNE_LISTING);
 

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java?rev=1526106&r1=1526105&r2=1526106&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java Wed
Sep 25 04:57:46 2013
@@ -26,6 +26,7 @@ import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -37,6 +38,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.thrift.TException;
 
 class VerifyingObjectStore extends ObjectStore {
   private static final Log LOG = LogFactory.getLog(VerifyingObjectStore.class);
@@ -53,7 +55,7 @@ class VerifyingObjectStore extends Objec
         dbName, tblName, filter, maxParts, true, false);
     List<Partition> ormResults = getPartitionsByFilterInternal(
         dbName, tblName, filter, maxParts, false, true);
-    compareParts(sqlResults, ormResults);
+    verifyParts(sqlResults, ormResults);
     return sqlResults;
   }
 
@@ -64,21 +66,39 @@ class VerifyingObjectStore extends Objec
         dbName, tblName, partNames, true, false);
     List<Partition> ormResults = getPartitionsByNamesInternal(
         dbName, tblName, partNames, false, true);
-    compareParts(sqlResults, ormResults);
+    verifyParts(sqlResults, ormResults);
     return sqlResults;
   }
 
   @Override
+  public boolean getPartitionsByExpr(String dbName, String tblName, byte[] expr,
+      String defaultPartitionName, short maxParts, Set<Partition> result) throws TException
{
+    Set<Partition> ormParts = new LinkedHashSet<Partition>();
+    boolean sqlResult = getPartitionsByExprInternal(
+        dbName, tblName, expr, defaultPartitionName, maxParts, result, true, false);
+    boolean ormResult = getPartitionsByExprInternal(
+        dbName, tblName, expr, defaultPartitionName, maxParts, ormParts, false, true);
+    if (sqlResult != ormResult) {
+      String msg = "The unknown flag is different - SQL " + sqlResult + ", ORM " + ormResult;
+      LOG.error(msg);
+      throw new MetaException(msg);
+    }
+    verifyParts(result, ormParts);
+    return sqlResult;
+  }
+
+  @Override
   public List<Partition> getPartitions(
       String dbName, String tableName, int maxParts) throws MetaException {
     List<Partition> sqlResults = getPartitionsInternal(dbName, tableName, maxParts,
true, false);
     List<Partition> ormResults = getPartitionsInternal(dbName, tableName, maxParts,
false, true);
-    compareParts(sqlResults, ormResults);
+    verifyParts(sqlResults, ormResults);
     return sqlResults;
   };
 
-  private void compareParts(List<Partition> sqlResults, List<Partition> ormResults)
+  private void verifyParts(Collection<Partition> sqlResults, Collection<Partition>
ormResults)
       throws MetaException {
+    final int MAX_DIFFS = 5;
     if (sqlResults.size() != ormResults.size()) {
       String msg = "Lists are not the same size: SQL " + sqlResults.size()
           + ", ORM " + ormResults.size();
@@ -86,9 +106,12 @@ class VerifyingObjectStore extends Objec
       throw new MetaException(msg);
     }
 
+    Iterator<Partition> sqlIter = sqlResults.iterator(), ormIter = ormResults.iterator();
     StringBuilder errorStr = new StringBuilder();
+    int errors = 0;
     for (int partIx = 0; partIx < sqlResults.size(); ++partIx) {
-      Partition p1 = sqlResults.get(partIx), p2 = ormResults.get(partIx);
+      assert sqlIter.hasNext() && ormIter.hasNext();
+      Partition p1 = sqlIter.next(), p2 = ormIter.next();
       if (EqualsBuilder.reflectionEquals(p1, p2)) continue;
       errorStr.append("Results are different at list index " + partIx + ": \n");
       try {
@@ -102,6 +125,10 @@ class VerifyingObjectStore extends Objec
         LOG.error(msg, t);
         break;
       }
+      if (++errors == MAX_DIFFS) {
+        errorStr.append("\n\nToo many diffs, giving up (lists might be sorted differently)");
+        break;
+      }
     }
     if (errorStr.length() > 0) {
       LOG.error("Different results: \n" + errorStr.toString());



Mime
View raw message