hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From heyongqi...@apache.org
Subject svn commit: r897322 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
Date Fri, 08 Jan 2010 21:10:44 GMT
Author: heyongqiang
Date: Fri Jan  8 21:10:44 2010
New Revision: 897322

URL: http://svn.apache.org/viewvc?rev=897322&view=rev
Log:
HIVE-1038: mapjoin dies if the join prunes all the columns

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=897322&r1=897321&r2=897322&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Jan  8 21:10:44 2010
@@ -424,6 +424,9 @@
     HIVE-978 Hive jars should follow Hadoop naming and include version
     (Chad Metcalf and Zheng Shao via namit)
 
+    HIVE-1038 mapjoin dies if the join prunes all the columns 
+    (namit via He Yongqiang)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java?rev=897322&r1=897321&r2=897322&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java Fri Jan  8
21:10:44 2010
@@ -44,14 +44,14 @@
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 
 public class PlanUtils {
-  
+
   protected final static Log LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.plan.PlanUtils");
-  
+
   public static enum ExpressionTypes {FIELD, JEXL};
 
   @SuppressWarnings("nls")
   public static mapredWork getMapRedWork() {
-    return new mapredWork("", 
+    return new mapredWork("",
                           new LinkedHashMap<String, ArrayList<String>> (),
                           new LinkedHashMap<String, partitionDesc> (),
                           new LinkedHashMap<String, Operator<? extends Serializable>>
(),
@@ -60,8 +60,8 @@
                           null,
                           Integer.valueOf (1), null);
   }
-  
-  /** 
+
+  /**
    * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
    * and column names (comma separated string).
    */
@@ -69,7 +69,7 @@
     return getDefaultTableDesc(separatorCode, columns, false);
   }
 
-  /** 
+  /**
    * Generate the table descriptor of given serde with the separatorCode
    * and column names (comma separated string).
    */
@@ -77,7 +77,7 @@
     return getTableDesc(serdeClass, separatorCode, columns, false);
   }
 
-  /** 
+  /**
    * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
    * and column names (comma separated string), and whether the last column should take
    * the rest of the line.
@@ -87,18 +87,18 @@
     return getDefaultTableDesc(separatorCode, columns, null, lastColumnTakesRestOfTheLine);
   }
 
-  /** 
+  /**
    * Generate the table descriptor of the serde specified with the separatorCode
    * and column names (comma separated string), and whether the last column should take
    * the rest of the line.
    */
-  public static tableDesc getTableDesc(Class<? extends Deserializer> serdeClass, 
+  public static tableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                        String separatorCode, String columns,
                                        boolean lastColumnTakesRestOfTheLine) {
     return getTableDesc(serdeClass, separatorCode, columns, null, lastColumnTakesRestOfTheLine);
   }
 
-  /** 
+  /**
    * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode
    * and column names (comma separated string), and whether the last column should take
    * the rest of the line.
@@ -122,13 +122,13 @@
     Properties properties = Utilities.makeProperties(
       Constants.SERIALIZATION_FORMAT, separatorCode,
       Constants.LIST_COLUMNS, columns);
-    
+
     if ( ! separatorCode.equals(Integer.toString(Utilities.ctrlaCode)) )
       properties.setProperty(Constants.FIELD_DELIM, separatorCode);
-    
+
     if (columnTypes != null)
       properties.setProperty(Constants.LIST_COLUMN_TYPES, columnTypes);
-    
+
     if (lastColumnTakesRestOfTheLine) {
       properties.setProperty(
           Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST,
@@ -142,20 +142,20 @@
       properties.setProperty(
           Constants.SERIALIZATION_USE_JSON_OBJECTS,
           "true");
-      
-      
+
+
     return new tableDesc(
       serdeClass,
       TextInputFormat.class,
       IgnoreKeyTextOutputFormat.class,
-      properties);    
+      properties);
   }
-  
+
   /**
    * Generate a table descriptor from a createTableDesc.
    */
   public static tableDesc getTableDesc(createTableDesc crtTblDesc, String cols, String colTypes)
{
-    
+
     Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
     String separatorCode                     = Integer.toString(Utilities.ctrlaCode);
     String columns                           = cols;
@@ -168,34 +168,34 @@
         Class c = Class.forName(crtTblDesc.getSerName());
         serdeClass = c;
       }
-    
+
       if ( crtTblDesc.getFieldDelim() != null )
         separatorCode = crtTblDesc.getFieldDelim();
-      
-      ret = getTableDesc(serdeClass, separatorCode, columns, columnTypes, 
+
+      ret = getTableDesc(serdeClass, separatorCode, columns, columnTypes,
                          lastColumnTakesRestOfTheLine, false);
-      
+
       // set other table properties
       Properties properties = ret.getProperties();
-      
+
       if ( crtTblDesc.getCollItemDelim() != null )
         properties.setProperty(Constants.COLLECTION_DELIM, crtTblDesc.getCollItemDelim());
-      
+
       if ( crtTblDesc.getMapKeyDelim() != null )
         properties.setProperty(Constants.MAPKEY_DELIM, crtTblDesc.getMapKeyDelim());
-      
+
       if ( crtTblDesc.getFieldEscape() != null )
         properties.setProperty(Constants.ESCAPE_CHAR, crtTblDesc.getFieldEscape());
-      
+
       if ( crtTblDesc.getLineDelim() != null )
         properties.setProperty(Constants.LINE_DELIM, crtTblDesc.getLineDelim());
-      
+
       // replace the default input & output file format with those found in crtTblDesc
       Class c1 = Class.forName(crtTblDesc.getInputFormat());
       Class c2 = Class.forName(crtTblDesc.getOutputFormat());
       Class<? extends InputFormat>      in_class  = c1;
       Class<? extends HiveOutputFormat> out_class = c2;
-    
+
       ret.setInputFileFormatClass(in_class);
       ret.setOutputFileFormatClass(out_class);
     } catch (ClassNotFoundException e) {
@@ -204,8 +204,8 @@
     }
     return ret;
   }
-  
-  /** 
+
+  /**
    * Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode.
    * MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe does not support a table
    * with a single column "col" with type "array<string>".
@@ -216,10 +216,10 @@
         TextInputFormat.class,
         IgnoreKeyTextOutputFormat.class,
         Utilities.makeProperties(
-            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, separatorCode));
   
+            org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, separatorCode));
   }
 
-  /** 
+  /**
    * Generate the table descriptor for reduce key.
    */
   public static tableDesc getReduceKeyTableDesc(List<FieldSchema> fieldSchemas, String
order) {
@@ -230,14 +230,14 @@
         Utilities.makeProperties(
             Constants.LIST_COLUMNS,
               MetaStoreUtils.getColumnNamesFromFieldSchema(fieldSchemas),
-            Constants.LIST_COLUMN_TYPES, 
+            Constants.LIST_COLUMN_TYPES,
               MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas),
-            Constants.SERIALIZATION_SORT_ORDER, 
+            Constants.SERIALIZATION_SORT_ORDER,
               order
         ));
   }
 
-  /** 
+  /**
    * Generate the table descriptor for Map-side join key.
    */
   public static tableDesc getMapJoinKeyTableDesc(List<FieldSchema> fieldSchemas) {
@@ -252,7 +252,7 @@
         ));
   }
 
-  /** 
+  /**
    * Generate the table descriptor for Map-side join key.
    */
   public static tableDesc getMapJoinValueTableDesc(List<FieldSchema> fieldSchemas)
{
@@ -267,7 +267,7 @@
         ));
   }
 
-  /** 
+  /**
    * Generate the table descriptor for intermediate files.
    */
   public static tableDesc getIntermediateFileTableDesc(List<FieldSchema> fieldSchemas)
{
@@ -281,11 +281,11 @@
             Constants.LIST_COLUMN_TYPES,
               MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas),
             Constants.ESCAPE_CHAR,
-              "\\"              
+              "\\"
         ));
   }
-  
-  /** 
+
+  /**
    * Generate the table descriptor for intermediate files.
    */
   public static tableDesc getReduceValueTableDesc(List<FieldSchema> fieldSchemas) {
@@ -299,11 +299,11 @@
             Constants.LIST_COLUMN_TYPES,
               MetaStoreUtils.getColumnTypesFromFieldSchema(fieldSchemas),
             Constants.ESCAPE_CHAR,
-              "\\"              
+              "\\"
         ));
   }
-  
-  /** 
+
+  /**
    * Convert the ColumnList to FieldSchema list.
    */
   public static List<FieldSchema> getFieldSchemasFromColumnList(List<exprNodeDesc>
cols, List<String> outputColumnNames, int start,
@@ -314,11 +314,11 @@
     }
     return schemas;
   }
-  
-  /** 
+
+  /**
    * Convert the ColumnList to FieldSchema list.
    */
-  public static List<FieldSchema> getFieldSchemasFromColumnList(List<exprNodeDesc>
cols, 
+  public static List<FieldSchema> getFieldSchemasFromColumnList(List<exprNodeDesc>
cols,
       String fieldPrefix) {
     List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
     for (int i=0; i<cols.size(); i++) {
@@ -326,30 +326,33 @@
     }
     return schemas;
   }
-  
-  /** 
+
+  /**
    * Convert the RowSchema to FieldSchema list.
    */
   public static List<FieldSchema> getFieldSchemasFromRowSchema(RowSchema row, String
fieldPrefix) {
     Vector<ColumnInfo> c = row.getSignature();
     return getFieldSchemasFromColumnInfo(c, fieldPrefix);
   }
-  
-  /** 
+
+  /**
    * Convert the ColumnInfo to FieldSchema.
    */
   public static List<FieldSchema> getFieldSchemasFromColumnInfo(Vector<ColumnInfo>
cols, String fieldPrefix) {
+    if ((cols == null) || (cols.size() == 0))
+      return new ArrayList<FieldSchema>();
+
     List<FieldSchema> schemas = new ArrayList<FieldSchema>(cols.size());
     for (int i=0; i<cols.size(); i++) {
       String name = cols.get(i).getInternalName();
       if (name.equals(Integer.valueOf(i).toString())) {
-        name = fieldPrefix + name; 
+        name = fieldPrefix + name;
       }
       schemas.add(MetaStoreUtils.getFieldSchemaFromTypeInfo(name, cols.get(i).getType()));
     }
     return schemas;
   }
-  
+
   public static List<FieldSchema> sortFieldSchemas(List<FieldSchema> schema)
{
     Collections.sort(schema, new Comparator<FieldSchema>(){
 
@@ -357,11 +360,11 @@
       public int compare(FieldSchema o1, FieldSchema o2) {
         return o1.getName().compareTo(o2.getName());
       }
-      
+
     });
     return schema;
   }
-  
+
   /**
    * Create the reduce sink descriptor.
    * @param keyCols   The columns to be stored in the key
@@ -369,15 +372,15 @@
    * @param outputColumnNames The output columns names
    * @param tag       The tag for this reducesink
    * @param partitionCols The columns for partitioning.
-   * @param numReducers  The number of reducers, set to -1 for automatic inference 
+   * @param numReducers  The number of reducers, set to -1 for automatic inference
    *                     based on input data size.
    * @return The reduceSinkDesc object.
    */
-  public static reduceSinkDesc getReduceSinkDesc(ArrayList<exprNodeDesc> keyCols, 
+  public static reduceSinkDesc getReduceSinkDesc(ArrayList<exprNodeDesc> keyCols,
                                                  ArrayList<exprNodeDesc> valueCols,
                                                  List<String> outputColumnNames,
                                                  boolean includeKeyCols,
-                                                 int tag, 
+                                                 int tag,
                                                  ArrayList<exprNodeDesc> partitionCols,
                                                  String order,
                                                  int numReducers) {
@@ -402,7 +405,7 @@
           valueCols, outputColumnNames, 0, ""));
       outputValCols.addAll(outputColumnNames);
     }
-    return new reduceSinkDesc(keyCols, valueCols, outputKeyCols, outputValCols, tag, partitionCols,
numReducers, 
+    return new reduceSinkDesc(keyCols, valueCols, outputKeyCols, outputValCols, tag, partitionCols,
numReducers,
         keyTable,
         // Revert to DynamicSerDe: getBinaryTableDesc(getFieldSchemasFromColumnList(valueCols,
"reducesinkvalue")));
         valueTable);
@@ -416,7 +419,7 @@
    * @param tag       The tag for this reducesink
    * @param numPartitionFields  The first numPartitionFields of keyCols will be partition
columns.
    *                  If numPartitionFields=-1, then partition randomly.
-   * @param numReducers  The number of reducers, set to -1 for automatic inference 
+   * @param numReducers  The number of reducers, set to -1 for automatic inference
    *                     based on input data size.
    * @return The reduceSinkDesc object.
    */
@@ -438,7 +441,7 @@
       partitionCols = new ArrayList<exprNodeDesc>(1);
       partitionCols.add(TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("rand"));
     }
-    
+
     StringBuilder order = new StringBuilder();
     for (int i=0; i<keyCols.size(); i++) {
       order.append("+");
@@ -446,7 +449,7 @@
     return getReduceSinkDesc(keyCols, valueCols, outputColumnNames, includeKey, tag, partitionCols,
order.toString(),
          numReducers);
   }
-  
+
 
 }
-  
+



Mime
View raw message