hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1622374 - in /hive/branches/tez: ./ common/src/java/org/apache/hadoop/hive/conf/ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/ metastore/src/java/org/apache/hadoop/hive/metastore/ metastore/src/test/org/apache/h...
Date Wed, 03 Sep 2014 22:54:19 GMT
Author: gunther
Date: Wed Sep  3 22:54:18 2014
New Revision: 1622374

URL: http://svn.apache.org/r1622374
Log:
Merge latest trunk into branch. (Gunther Hagleitner)

Added:
    hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
      - copied unchanged from r1622373, hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
Modified:
    hive/branches/tez/   (props changed)
    hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
    hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
    hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
    hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
    hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
    hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
    hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java

Propchange: hive/branches/tez/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1622081-1622373

Modified: hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Sep  3
22:54:18 2014
@@ -835,6 +835,10 @@ public class HiveConf extends Configurat
         "If the number of keys in a dictionary is greater than this fraction of the total
number of\n" +
         "non-null rows, turn off dictionary encoding.  Use 1 to always use dictionary encoding."),
     HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000, "Define
the default ORC index stride"),
+    HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK("hive.orc.row.index.stride.dictionary.check",
true,
+        "If enabled dictionary check will happen after first row index stride (default 10000
rows)\n" +
+        "else dictionary check will happen before writing first stripe. In both cases, the
decision\n" +
+        "to use dictionary or not will be retained thereafter."),
     HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024, "Define
the default ORC buffer size"),
     HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true, "Define the
default block padding"),
     HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f,

Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
(original)
+++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
Wed Sep  3 22:54:18 2014
@@ -199,7 +199,8 @@ public class HCatLoader extends HCatBase
     throws IOException {
     Table table = phutil.getTable(location,
       hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
-      PigHCatUtil.getHCatServerPrincipal(job));
+      PigHCatUtil.getHCatServerPrincipal(job),
+      job);   // Pass job to initialize metastore conf overrides
     List<FieldSchema> tablePartitionKeys = table.getPartitionKeys();
     String[] partitionKeys = new String[tablePartitionKeys.size()];
     for (int i = 0; i < tablePartitionKeys.size(); i++) {
@@ -215,7 +216,11 @@ public class HCatLoader extends HCatBase
 
     Table table = phutil.getTable(location,
       hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
-      PigHCatUtil.getHCatServerPrincipal(job));
+      PigHCatUtil.getHCatServerPrincipal(job),
+
+      // Pass job to initialize metastore conf overrides for embedded metastore case
+      // (hive.metastore.uris = "").
+      job);
     HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table);
     try {
       PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema);

Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
(original)
+++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
Wed Sep  3 22:54:18 2014
@@ -142,8 +142,16 @@ class PigHCatUtil {
   }
 
   private static HiveMetaStoreClient getHiveMetaClient(String serverUri,
-                             String serverKerberosPrincipal, Class<?> clazz) throws
Exception {
-    HiveConf hiveConf = new HiveConf(clazz);
+                             String serverKerberosPrincipal,
+                             Class<?> clazz,
+                             Job job) throws Exception {
+
+    // The job configuration is passed in so the configuration will be cloned
+    // from the pig job configuration. This is necessary for overriding
+    // metastore configuration arguments like the metastore jdbc connection string
+    // and password, in the case of an embedded metastore, which you get when
+    // hive.metastore.uris = "".
+    HiveConf hiveConf = new HiveConf(job.getConfiguration(), clazz);
 
     if (serverUri != null) {
       hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim());
@@ -178,7 +186,13 @@ class PigHCatUtil {
     return new HCatSchema(fcols);
   }
 
-  public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal)
throws IOException {
+  /*
+  * The job argument is passed so that configuration overrides can be used to initialize
+  * the metastore configuration in the special case of an embedded metastore
+  * (hive.metastore.uris = "").
+  */
+  public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal,
+      Job job) throws IOException {
     Pair<String, String> loc_server = new Pair<String, String>(location, hcatServerUri);
     Table hcatTable = hcatTableCache.get(loc_server);
     if (hcatTable != null) {
@@ -191,7 +205,7 @@ class PigHCatUtil {
     Table table = null;
     HiveMetaStoreClient client = null;
     try {
-      client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class);
+      client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class, job);
       table = HCatUtil.getTable(client, dbName, tableName);
     } catch (NoSuchObjectException nsoe) {
       throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE);
// prettier error messages to frontend

Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
(original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
Wed Sep  3 22:54:18 2014
@@ -3721,19 +3721,6 @@ public class HiveMetaStore extends Thrif
         endFunction("write_partition_column_statistics: ", ret != false, null, tableName);
       }
     } 
-    public boolean update_partition_column_statistics(
-        SetPartitionsStatsRequest request) throws NoSuchObjectException,
-        InvalidObjectException, MetaException, TException,
-        InvalidInputException {
-      boolean ret = false;
-      try {
-        ret = getMS().updatePartitionColumnStatistics(request);
-        return ret;
-      } finally {
-        endFunction("write_partition_column_statistics: ", ret != false, null,
-            null);
-      }
-    }
 
     @Override
     public boolean delete_partition_column_statistics(String dbName, String tableName,
@@ -5058,9 +5045,13 @@ public class HiveMetaStore extends Thrif
 
     @Override
     public boolean set_aggr_stats_for(SetPartitionsStatsRequest request)
-        throws NoSuchObjectException, InvalidObjectException, MetaException,
-        InvalidInputException, TException {
-      return update_partition_column_statistics(request);
+        throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException,
+        TException {
+      boolean ret = true;
+      for (ColumnStatistics colStats : request.getColStats()) {
+        ret = ret && update_partition_column_statistics(colStats);
+      }
+      return ret;
     }
   }
 

Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
(original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
Wed Sep  3 22:54:18 2014
@@ -5779,34 +5779,6 @@ public class ObjectStore implements RawS
     }
   }
 
-  @Override
-  public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
-      throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
{
-    boolean committed = false;
-    try {
-      openTransaction();
-      for (ColumnStatistics colStats : request.getColStats()) {
-        ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
-        statsDesc.setDbName(statsDesc.getDbName().toLowerCase());
-        statsDesc.setTableName(statsDesc.getTableName().toLowerCase());
-        List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
-        for (ColumnStatisticsObj statsObj : statsObjs) {
-          statsObj.setColName(statsObj.getColName().toLowerCase());
-          statsObj.setColType(statsObj.getColType().toLowerCase());
-          MPartitionColumnStatistics mStatsObj = StatObjectConverter
-              .convertToMPartitionColumnStatistics(null, statsDesc, statsObj);
-          pm.makePersistent(mStatsObj);
-        }
-      }
-      committed = commitTransaction();
-      return committed;
-    } finally {
-      if (!committed) {
-        rollbackTransaction();
-      }
-    }
-  }
-
   private List<MTableColumnStatistics> getMTableColumnStatistics(
       Table table, List<String> colNames) throws MetaException {
     boolean committed = false;

Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Wed
Sep  3 22:54:18 2014
@@ -552,8 +552,5 @@ public interface RawStore extends Config
 
   public AggrStats get_aggr_stats_for(String dbName, String tblName,
     List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
-
-  boolean updatePartitionColumnStatistics(
-      SetPartitionsStatsRequest request) throws NoSuchObjectException,
-      MetaException, InvalidObjectException, InvalidInputException;
+  
 }

Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
(original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
Wed Sep  3 22:54:18 2014
@@ -720,10 +720,4 @@ public class DummyRawStoreControlledComm
     return null;
   }
 
-  @Override
-  public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
-      throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
{
-    return objectStore.updatePartitionColumnStatistics(request);
-  }
-
 }

Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
(original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
Wed Sep  3 22:54:18 2014
@@ -736,12 +736,7 @@ public class DummyRawStoreForJdoConnecti
       throws MetaException {
     return null;
   }
-
-  @Override
-  public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
-      throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException
{
-    return false;
-  }
+  
 }
 
 

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Wed Sep
 3 22:54:18 2014
@@ -96,9 +96,6 @@ class WriterImpl implements Writer, Memo
   private static final int HDFS_BUFFER_SIZE = 256 * 1024;
   private static final int MIN_ROW_INDEX_STRIDE = 1000;
 
-  // HDFS requires blocks < 2GB and multiples of 512, so pick 1.5GB
-  private static final long MAX_BLOCK_SIZE = 1536 * 1024 * 1024;
-
   // threshold above which buffer size will be automatically resized
   private static final int COLUMN_COUNT_THRESHOLD = 1000;
 
@@ -135,8 +132,6 @@ class WriterImpl implements Writer, Memo
     new TreeMap<String, ByteString>();
   private final StreamFactory streamFactory = new StreamFactory();
   private final TreeWriter treeWriter;
-  private final OrcProto.RowIndex.Builder rowIndex =
-      OrcProto.RowIndex.newBuilder();
   private final boolean buildIndex;
   private final MemoryManager memoryManager;
   private final OrcFile.Version version;
@@ -678,7 +673,7 @@ class WriterImpl implements Writer, Memo
       if (rowIndexStream != null) {
         if (rowIndex.getEntryCount() != requiredIndexEntries) {
           throw new IllegalArgumentException("Column has wrong number of " +
-               "index entries found: " + rowIndexEntry + " expected: " +
+               "index entries found: " + rowIndex.getEntryCount() + " expected: " +
                requiredIndexEntries);
         }
         rowIndex.build().writeTo(rowIndexStream);
@@ -1005,6 +1000,8 @@ class WriterImpl implements Writer, Memo
     private final float dictionaryKeySizeThreshold;
     private boolean useDictionaryEncoding = true;
     private boolean isDirectV2 = true;
+    private boolean doneDictionaryCheck;
+    private final boolean strideDictionaryCheck;
 
     StringTreeWriter(int columnId,
                      ObjectInspector inspector,
@@ -1025,9 +1022,14 @@ class WriterImpl implements Writer, Memo
       directLengthOutput = createIntegerWriter(writer.createStream(id,
           OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
       dictionaryKeySizeThreshold = writer.getConfiguration().getFloat(
-        HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
-        HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.
-          defaultFloatVal);
+          HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
+          HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.
+              defaultFloatVal);
+      strideDictionaryCheck = writer.getConfiguration().getBoolean(
+          HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname,
+          HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.
+            defaultBoolVal);
+      doneDictionaryCheck = false;
     }
 
     /**
@@ -1045,21 +1047,71 @@ class WriterImpl implements Writer, Memo
       super.write(obj);
       if (obj != null) {
         Text val = getTextValue(obj);
-        rows.add(dictionary.add(val));
+        if (useDictionaryEncoding || !strideDictionaryCheck) {
+          rows.add(dictionary.add(val));
+        } else {
+          // write data and length
+          directStreamOutput.write(val.getBytes(), 0, val.getLength());
+          directLengthOutput.write(val.getLength());
+        }
         indexStatistics.updateString(val);
       }
     }
 
+    private boolean checkDictionaryEncoding() {
+      if (!doneDictionaryCheck) {
+        // Set the flag indicating whether or not to use dictionary encoding
+        // based on whether or not the fraction of distinct keys over number of
+        // non-null rows is less than the configured threshold
+        float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f;
+        useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold;
+        doneDictionaryCheck = true;
+      }
+      return useDictionaryEncoding;
+    }
+
     @Override
     void writeStripe(OrcProto.StripeFooter.Builder builder,
                      int requiredIndexEntries) throws IOException {
-      // Set the flag indicating whether or not to use dictionary encoding
-      // based on whether or not the fraction of distinct keys over number of
-      // non-null rows is less than the configured threshold
-      useDictionaryEncoding =
-        (!isDirectV2) || (rows.size() > 0 &&
-                          (float)(dictionary.size()) / rows.size() <=
-                            dictionaryKeySizeThreshold);
+      // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
+      // checking would not have happened. So do it again here.
+      checkDictionaryEncoding();
+
+      if (useDictionaryEncoding) {
+        flushDictionary();
+      } else {
+        // flushout any left over entries from dictionary
+        if (rows.size() > 0) {
+          flushDictionary();
+        }
+
+        // suppress the stream for every stripe if dictionary is disabled
+        stringOutput.suppress();
+      }
+
+      // we need to build the rowindex before calling super, since it
+      // writes it out.
+      super.writeStripe(builder, requiredIndexEntries);
+      stringOutput.flush();
+      lengthOutput.flush();
+      rowOutput.flush();
+      directStreamOutput.flush();
+      directLengthOutput.flush();
+      // reset all of the fields to be ready for the next stripe.
+      dictionary.clear();
+      savedRowIndex.clear();
+      rowIndexValueCount.clear();
+      recordPosition(rowIndexPosition);
+      rowIndexValueCount.add(0L);
+
+      if (!useDictionaryEncoding) {
+        // record the start positions of first index stride of next stripe i.e
+        // beginning of the direct streams when dictionary is disabled
+        recordDirectStreamPosition();
+      }
+    }
+
+    private void flushDictionary() throws IOException {
       final int[] dumpOrder = new int[dictionary.size()];
 
       if (useDictionaryEncoding) {
@@ -1113,21 +1165,7 @@ class WriterImpl implements Writer, Memo
           }
         }
       }
-      // we need to build the rowindex before calling super, since it
-      // writes it out.
-      super.writeStripe(builder, requiredIndexEntries);
-      stringOutput.flush();
-      lengthOutput.flush();
-      rowOutput.flush();
-      directStreamOutput.flush();
-      directLengthOutput.flush();
-      // reset all of the fields to be ready for the next stripe.
-      dictionary.clear();
       rows.clear();
-      savedRowIndex.clear();
-      rowIndexValueCount.clear();
-      recordPosition(rowIndexPosition);
-      rowIndexValueCount.add(0L);
     }
 
     @Override
@@ -1165,10 +1203,30 @@ class WriterImpl implements Writer, Memo
       OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
       rowIndexEntry.setStatistics(indexStatistics.serialize());
       indexStatistics.reset();
-      savedRowIndex.add(rowIndexEntry.build());
+      OrcProto.RowIndexEntry base = rowIndexEntry.build();
+      savedRowIndex.add(base);
       rowIndexEntry.clear();
       recordPosition(rowIndexPosition);
       rowIndexValueCount.add(Long.valueOf(rows.size()));
+      if (strideDictionaryCheck) {
+        checkDictionaryEncoding();
+      }
+      if (!useDictionaryEncoding) {
+        if (rows.size() > 0) {
+          flushDictionary();
+          // just record the start positions of next index stride
+          recordDirectStreamPosition();
+        } else {
+          // record the start positions of next index stride
+          recordDirectStreamPosition();
+          getRowIndex().addEntry(base);
+        }
+      }
+    }
+
+    private void recordDirectStreamPosition() throws IOException {
+      directStreamOutput.getPosition(rowIndexPosition);
+      directLengthOutput.getPosition(rowIndexPosition);
     }
 
     @Override

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Wed Sep  3
22:54:18 2014
@@ -109,6 +109,7 @@ import org.apache.hadoop.hive.shims.Hado
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.thrift.TException;
 
 import com.google.common.collect.Sets;
@@ -128,6 +129,7 @@ public class Hive {
 
   private HiveConf conf = null;
   private IMetaStoreClient metaStoreClient;
+  private UserGroupInformation owner;
 
   private static ThreadLocal<Hive> hiveDB = new ThreadLocal<Hive>() {
     @Override
@@ -181,7 +183,11 @@ public class Hive {
    */
   public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException {
     Hive db = hiveDB.get();
-    if (db == null || needsRefresh) {
+    if (db == null || needsRefresh || !db.isCurrentUserOwner()) {
+      if (db != null) {
+        LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh +
+          ", db.isCurrentUserOwner = " + db.isCurrentUserOwner());
+      }
       closeCurrent();
       c.set("fs.scheme.class", "dfs");
       Hive newdb = new Hive(c);
@@ -194,6 +200,11 @@ public class Hive {
 
   public static Hive get() throws HiveException {
     Hive db = hiveDB.get();
+    if (db != null && !db.isCurrentUserOwner()) {
+      LOG.debug("Creating new db. db.isCurrentUserOwner = " + db.isCurrentUserOwner());
+      db.close();
+      db = null;
+    }
     if (db == null) {
       SessionState session = SessionState.get();
       db = new Hive(session == null ? new HiveConf(Hive.class) : session.getConf());
@@ -220,6 +231,17 @@ public class Hive {
     conf = c;
   }
 
+
+  private boolean isCurrentUserOwner() throws HiveException {
+    try {
+      return owner == null || owner.equals(UserGroupInformation.getCurrentUser());
+    } catch(IOException e) {
+      throw new HiveException("Error getting current user: " + e.getMessage(), e);
+    }
+  }
+
+
+
   /**
    * closes the connection to metastore for the calling thread
    */
@@ -2496,6 +2518,13 @@ private void constructOneLBLocationMap(F
   @Unstable
   public IMetaStoreClient getMSC() throws MetaException {
     if (metaStoreClient == null) {
+      try {
+        owner = UserGroupInformation.getCurrentUser();
+      } catch(IOException e) {
+        String msg = "Error getting current user: " + e.getMessage();
+        LOG.error(msg, e);
+        throw new MetaException(msg + "\n" + StringUtils.stringifyException(e));
+      }
       metaStoreClient = createMetaStoreClient();
     }
     return metaStoreClient;

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java Wed Sep
 3 22:54:18 2014
@@ -32,6 +32,7 @@ public class ColStatistics {
   private double avgColLen;
   private long numTrues;
   private long numFalses;
+  private Range range;
 
   public ColStatistics(String tabAlias, String colName, String colType) {
     this.setTableAlias(tabAlias);
@@ -118,6 +119,17 @@ public class ColStatistics {
     this.numFalses = numFalses;
   }
 
+  public Range getRange() {
+    return range;
+  }
+
+  public void setRange(Number minVal, Number maxVal) {
+    this.range = new Range(minVal, maxVal);
+  }
+
+  public void setRange(Range r) {
+    this.range = r;
+  }
 
   @Override
   public String toString() {
@@ -150,7 +162,24 @@ public class ColStatistics {
     clone.setNumNulls(numNulls);
     clone.setNumTrues(numTrues);
     clone.setNumFalses(numFalses);
+    if (range != null ) {
+      clone.setRange(range.clone());
+    }
     return clone;
   }
 
+  public static class Range {
+    public final Number minValue;
+    public final Number maxValue;
+    Range(Number minValue, Number maxValue) {
+      super();
+      this.minValue = minValue;
+      this.maxValue = maxValue;
+    }
+    @Override
+    public Range clone() {
+      return new Range(minValue, maxValue);
+    }
+  }
+
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Wed Sep
 3 22:54:18 2014
@@ -25,10 +25,12 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -76,6 +78,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
 
+import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -404,18 +408,22 @@ public class StatsUtils {
       cs.setCountDistint(csd.getLongStats().getNumDVs());
       cs.setNumNulls(csd.getLongStats().getNumNulls());
       cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
     } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
       cs.setCountDistint(csd.getLongStats().getNumDVs());
       cs.setNumNulls(csd.getLongStats().getNumNulls());
       cs.setAvgColLen(JavaDataModel.get().primitive2());
+      cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
     } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
       cs.setCountDistint(csd.getDoubleStats().getNumDVs());
       cs.setNumNulls(csd.getDoubleStats().getNumNulls());
       cs.setAvgColLen(JavaDataModel.get().primitive1());
+      cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
     } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
       cs.setCountDistint(csd.getDoubleStats().getNumDVs());
       cs.setNumNulls(csd.getDoubleStats().getNumNulls());
       cs.setAvgColLen(JavaDataModel.get().primitive2());
+      cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
     } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
         || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
         || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
@@ -441,6 +449,13 @@ public class StatsUtils {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
       cs.setCountDistint(csd.getDecimalStats().getNumDVs());
       cs.setNumNulls(csd.getDecimalStats().getNumNulls());
+      Decimal val = csd.getDecimalStats().getHighValue();
+      BigDecimal maxVal = HiveDecimal.
+          create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
+      val = csd.getDecimalStats().getLowValue();
+      BigDecimal minVal = HiveDecimal.
+          create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
+      cs.setRange(minVal, maxVal);
     } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
     } else {

Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Wed Sep
 3 22:54:18 2014
@@ -42,6 +42,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -1684,7 +1685,7 @@ public class TestOrcFile {
   }
 
   @Test
-  public void testMemoryManagement() throws Exception {
+  public void testMemoryManagementV11() throws Exception {
     ObjectInspector inspector;
     synchronized (TestOrcFile.class) {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
@@ -1699,7 +1700,8 @@ public class TestOrcFile {
                                          .stripeSize(50000)
                                          .bufferSize(100)
                                          .rowIndexStride(0)
-                                         .memory(memory));
+                                         .memory(memory)
+                                         .version(Version.V_0_11));
     assertEquals(testFilePath, memory.path);
     for(int i=0; i < 2500; ++i) {
       writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
@@ -1719,6 +1721,45 @@ public class TestOrcFile {
   }
 
   @Test
+  public void testMemoryManagementV12() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (InnerStruct.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
+    Writer writer = OrcFile.createWriter(testFilePath,
+                                         OrcFile.writerOptions(conf)
+                                         .inspector(inspector)
+                                         .compress(CompressionKind.NONE)
+                                         .stripeSize(50000)
+                                         .bufferSize(100)
+                                         .rowIndexStride(0)
+                                         .memory(memory)
+                                         .version(Version.V_0_12));
+    assertEquals(testFilePath, memory.path);
+    for(int i=0; i < 2500; ++i) {
+      writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
+    }
+    writer.close();
+    assertEquals(null, memory.path);
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    int i = 0;
+    for(StripeInformation stripe: reader.getStripes()) {
+      i += 1;
+      assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
+          stripe.getDataLength() < 5000);
+    }
+    // with HIVE-7832, the dictionaries will be disabled after writing the first
+    // stripe as there are too many distinct values. Hence only 3 stripes as
+    // compared to 25 stripes in version 0.11 (above test case)
+    assertEquals(3, i);
+    assertEquals(2500, reader.getNumberOfRows());
+  }
+
+  @Test
   public void testPredicatePushdown() throws Exception {
     ObjectInspector inspector;
     synchronized (TestOrcFile.class) {

Modified: hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java
(original)
+++ hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java
Wed Sep  3 22:54:18 2014
@@ -24,6 +24,7 @@ import java.net.InetAddress;
 import java.net.Socket;
 import java.security.PrivilegedAction;
 import java.security.PrivilegedExceptionAction;
+import java.util.Locale;
 import java.util.Map;
 
 import javax.security.auth.callback.Callback;
@@ -79,11 +80,23 @@ public class HadoopThriftAuthBridge20S e
   }
 
   @Override
-  public Client createClientWithConf(String authType) {
-    Configuration conf = new Configuration();
-    conf.set(HADOOP_SECURITY_AUTHENTICATION, authType);
-    UserGroupInformation.setConfiguration(conf);
-    return new Client();
+  public Client createClientWithConf(String authMethod) {
+    UserGroupInformation ugi;
+    try {
+      ugi = UserGroupInformation.getLoginUser();
+    } catch(IOException e) {
+      throw new IllegalStateException("Unable to get current login user: " + e, e);
+    }
+    if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
+      LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current.");
+      return new Client();
+    } else {
+      LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current.");
+      Configuration conf = new Configuration();
+      conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
+      UserGroupInformation.setConfiguration(conf);
+      return new Client();
+    }
   }
 
   @Override
@@ -105,15 +118,48 @@ public class HadoopThriftAuthBridge20S e
   }
 
   @Override
-  public UserGroupInformation getCurrentUGIWithConf(String authType)
+  public UserGroupInformation getCurrentUGIWithConf(String authMethod)
       throws IOException {
-    Configuration conf = new Configuration();
-    conf.set(HADOOP_SECURITY_AUTHENTICATION, authType);
-    UserGroupInformation.setConfiguration(conf);
-    return UserGroupInformation.getCurrentUser();
+    UserGroupInformation ugi;
+    try {
+      ugi = UserGroupInformation.getCurrentUser();
+    } catch(IOException e) {
+      throw new IllegalStateException("Unable to get current user: " + e, e);
+    }
+    if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
+      LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current.");
+      return ugi;
+    } else {
+      LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current.");
+      Configuration conf = new Configuration();
+      conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
+      UserGroupInformation.setConfiguration(conf);
+      return UserGroupInformation.getCurrentUser();
+    }
   }
 
   /**
+   * Return true if the current login user is already using the given authMethod.
+   *
+   * Used above to ensure we do not create a new Configuration object and as such
+   * lose other settings such as the cluster to which the JVM is connected. Required
+   * for oozie since it does not have a core-site.xml see HIVE-7682
+   */
+  private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod)
{
+    AuthenticationMethod authMethod;
+    try {
+      // based on SecurityUtil.getAuthenticationMethod()
+      authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH));
+    } catch (IllegalArgumentException iae) {
+      throw new IllegalArgumentException("Invalid attribute value for " +
+          HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae);
+    }
+    LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod());
+    return ugi.getAuthenticationMethod().equals(authMethod);
+  }
+
+
+  /**
    * Read and return Hadoop SASL configuration which can be configured using
    * "hadoop.rpc.protection"
    * @param conf



Mime
View raw message