Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2676311A61 for ; Wed, 3 Sep 2014 22:54:43 +0000 (UTC) Received: (qmail 89603 invoked by uid 500); 3 Sep 2014 22:54:43 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 89574 invoked by uid 500); 3 Sep 2014 22:54:43 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 89563 invoked by uid 99); 3 Sep 2014 22:54:42 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 03 Sep 2014 22:54:42 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 03 Sep 2014 22:54:40 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CBB38238890D; Wed, 3 Sep 2014 22:54:19 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1622374 - in /hive/branches/tez: ./ common/src/java/org/apache/hadoop/hive/conf/ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/ metastore/src/java/org/apache/hadoop/hive/metastore/ metastore/src/test/org/apache/h... Date: Wed, 03 Sep 2014 22:54:19 -0000 To: commits@hive.apache.org From: gunther@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140903225419.CBB38238890D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: gunther Date: Wed Sep 3 22:54:18 2014 New Revision: 1622374 URL: http://svn.apache.org/r1622374 Log: Merge latest trunk into branch. (Gunther Hagleitner) Added: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java - copied unchanged from r1622373, hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java Modified: hive/branches/tez/ (props changed) hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java Propchange: hive/branches/tez/ ------------------------------------------------------------------------------ Merged /hive/trunk:r1622081-1622373 Modified: hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java URL: http://svn.apache.org/viewvc/hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original) +++ hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Sep 3 22:54:18 2014 @@ -835,6 +835,10 @@ public class HiveConf extends Configurat "If the number of keys in a dictionary is greater than this fraction of the total number of\n" + "non-null rows, turn off dictionary encoding. Use 1 to always use dictionary encoding."), HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000, "Define the default ORC index stride"), + HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK("hive.orc.row.index.stride.dictionary.check", true, + "If enabled dictionary check will happen after first row index stride (default 10000 rows)\n" + + "else dictionary check will happen before writing first stripe. In both cases, the decision\n" + + "to use dictionary or not will be retained thereafter."), HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024, "Define the default ORC buffer size"), HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true, "Define the default block padding"), HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f, Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java (original) +++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java Wed Sep 3 22:54:18 2014 @@ -199,7 +199,8 @@ public class HCatLoader extends HCatBase throws IOException { Table table = phutil.getTable(location, hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); + PigHCatUtil.getHCatServerPrincipal(job), + job); // Pass job to initialize metastore conf overrides List tablePartitionKeys = table.getPartitionKeys(); String[] partitionKeys = new String[tablePartitionKeys.size()]; for (int i = 0; i < tablePartitionKeys.size(); i++) { @@ -215,7 +216,11 @@ public class HCatLoader extends HCatBase Table table = phutil.getTable(location, hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); + PigHCatUtil.getHCatServerPrincipal(job), + + // Pass job to initialize metastore conf overrides for embedded metastore case + // (hive.metastore.uris = ""). + job); HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); try { PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java (original) +++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java Wed Sep 3 22:54:18 2014 @@ -142,8 +142,16 @@ class PigHCatUtil { } private static HiveMetaStoreClient getHiveMetaClient(String serverUri, - String serverKerberosPrincipal, Class clazz) throws Exception { - HiveConf hiveConf = new HiveConf(clazz); + String serverKerberosPrincipal, + Class clazz, + Job job) throws Exception { + + // The job configuration is passed in so the configuration will be cloned + // from the pig job configuration. This is necessary for overriding + // metastore configuration arguments like the metastore jdbc connection string + // and password, in the case of an embedded metastore, which you get when + // hive.metastore.uris = "". + HiveConf hiveConf = new HiveConf(job.getConfiguration(), clazz); if (serverUri != null) { hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); @@ -178,7 +186,13 @@ class PigHCatUtil { return new HCatSchema(fcols); } - public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { + /* + * The job argument is passed so that configuration overrides can be used to initialize + * the metastore configuration in the special case of an embedded metastore + * (hive.metastore.uris = ""). + */ + public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal, + Job job) throws IOException { Pair loc_server = new Pair(location, hcatServerUri); Table hcatTable = hcatTableCache.get(loc_server); if (hcatTable != null) { @@ -191,7 +205,7 @@ class PigHCatUtil { Table table = null; HiveMetaStoreClient client = null; try { - client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); + client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class, job); table = HCatUtil.getTable(client, dbName, tableName); } catch (NoSuchObjectException nsoe) { throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Wed Sep 3 22:54:18 2014 @@ -3721,19 +3721,6 @@ public class HiveMetaStore extends Thrif endFunction("write_partition_column_statistics: ", ret != false, null, tableName); } } - public boolean update_partition_column_statistics( - SetPartitionsStatsRequest request) throws NoSuchObjectException, - InvalidObjectException, MetaException, TException, - InvalidInputException { - boolean ret = false; - try { - ret = getMS().updatePartitionColumnStatistics(request); - return ret; - } finally { - endFunction("write_partition_column_statistics: ", ret != false, null, - null); - } - } @Override public boolean delete_partition_column_statistics(String dbName, String tableName, @@ -5058,9 +5045,13 @@ public class HiveMetaStore extends Thrif @Override public boolean set_aggr_stats_for(SetPartitionsStatsRequest request) - throws NoSuchObjectException, InvalidObjectException, MetaException, - InvalidInputException, TException { - return update_partition_column_statistics(request); + throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException, + TException { + boolean ret = true; + for (ColumnStatistics colStats : request.getColStats()) { + ret = ret && update_partition_column_statistics(colStats); + } + return ret; } } Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Wed Sep 3 22:54:18 2014 @@ -5779,34 +5779,6 @@ public class ObjectStore implements RawS } } - @Override - public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request) - throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - boolean committed = false; - try { - openTransaction(); - for (ColumnStatistics colStats : request.getColStats()) { - ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); - statsDesc.setDbName(statsDesc.getDbName().toLowerCase()); - statsDesc.setTableName(statsDesc.getTableName().toLowerCase()); - List statsObjs = colStats.getStatsObj(); - for (ColumnStatisticsObj statsObj : statsObjs) { - statsObj.setColName(statsObj.getColName().toLowerCase()); - statsObj.setColType(statsObj.getColType().toLowerCase()); - MPartitionColumnStatistics mStatsObj = StatObjectConverter - .convertToMPartitionColumnStatistics(null, statsDesc, statsObj); - pm.makePersistent(mStatsObj); - } - } - committed = commitTransaction(); - return committed; - } finally { - if (!committed) { - rollbackTransaction(); - } - } - } - private List getMTableColumnStatistics( Table table, List colNames) throws MetaException { boolean committed = false; Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original) +++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Wed Sep 3 22:54:18 2014 @@ -552,8 +552,5 @@ public interface RawStore extends Config public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, List colNames) throws MetaException, NoSuchObjectException; - - boolean updatePartitionColumnStatistics( - SetPartitionsStatsRequest request) throws NoSuchObjectException, - MetaException, InvalidObjectException, InvalidInputException; + } Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original) +++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Wed Sep 3 22:54:18 2014 @@ -720,10 +720,4 @@ public class DummyRawStoreControlledComm return null; } - @Override - public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request) - throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - return objectStore.updatePartitionColumnStatistics(request); - } - } Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original) +++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Wed Sep 3 22:54:18 2014 @@ -736,12 +736,7 @@ public class DummyRawStoreForJdoConnecti throws MetaException { return null; } - - @Override - public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request) - throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { - return false; - } + } Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Wed Sep 3 22:54:18 2014 @@ -96,9 +96,6 @@ class WriterImpl implements Writer, Memo private static final int HDFS_BUFFER_SIZE = 256 * 1024; private static final int MIN_ROW_INDEX_STRIDE = 1000; - // HDFS requires blocks < 2GB and multiples of 512, so pick 1.5GB - private static final long MAX_BLOCK_SIZE = 1536 * 1024 * 1024; - // threshold above which buffer size will be automatically resized private static final int COLUMN_COUNT_THRESHOLD = 1000; @@ -135,8 +132,6 @@ class WriterImpl implements Writer, Memo new TreeMap(); private final StreamFactory streamFactory = new StreamFactory(); private final TreeWriter treeWriter; - private final OrcProto.RowIndex.Builder rowIndex = - OrcProto.RowIndex.newBuilder(); private final boolean buildIndex; private final MemoryManager memoryManager; private final OrcFile.Version version; @@ -678,7 +673,7 @@ class WriterImpl implements Writer, Memo if (rowIndexStream != null) { if (rowIndex.getEntryCount() != requiredIndexEntries) { throw new IllegalArgumentException("Column has wrong number of " + - "index entries found: " + rowIndexEntry + " expected: " + + "index entries found: " + rowIndex.getEntryCount() + " expected: " + requiredIndexEntries); } rowIndex.build().writeTo(rowIndexStream); @@ -1005,6 +1000,8 @@ class WriterImpl implements Writer, Memo private final float dictionaryKeySizeThreshold; private boolean useDictionaryEncoding = true; private boolean isDirectV2 = true; + private boolean doneDictionaryCheck; + private final boolean strideDictionaryCheck; StringTreeWriter(int columnId, ObjectInspector inspector, @@ -1025,9 +1022,14 @@ class WriterImpl implements Writer, Memo directLengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); dictionaryKeySizeThreshold = writer.getConfiguration().getFloat( - HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, - HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD. - defaultFloatVal); + HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, + HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD. + defaultFloatVal); + strideDictionaryCheck = writer.getConfiguration().getBoolean( + HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname, + HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK. + defaultBoolVal); + doneDictionaryCheck = false; } /** @@ -1045,21 +1047,71 @@ class WriterImpl implements Writer, Memo super.write(obj); if (obj != null) { Text val = getTextValue(obj); - rows.add(dictionary.add(val)); + if (useDictionaryEncoding || !strideDictionaryCheck) { + rows.add(dictionary.add(val)); + } else { + // write data and length + directStreamOutput.write(val.getBytes(), 0, val.getLength()); + directLengthOutput.write(val.getLength()); + } indexStatistics.updateString(val); } } + private boolean checkDictionaryEncoding() { + if (!doneDictionaryCheck) { + // Set the flag indicating whether or not to use dictionary encoding + // based on whether or not the fraction of distinct keys over number of + // non-null rows is less than the configured threshold + float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f; + useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold; + doneDictionaryCheck = true; + } + return useDictionaryEncoding; + } + @Override void writeStripe(OrcProto.StripeFooter.Builder builder, int requiredIndexEntries) throws IOException { - // Set the flag indicating whether or not to use dictionary encoding - // based on whether or not the fraction of distinct keys over number of - // non-null rows is less than the configured threshold - useDictionaryEncoding = - (!isDirectV2) || (rows.size() > 0 && - (float)(dictionary.size()) / rows.size() <= - dictionaryKeySizeThreshold); + // if rows in stripe is less than dictionaryCheckAfterRows, dictionary + // checking would not have happened. So do it again here. + checkDictionaryEncoding(); + + if (useDictionaryEncoding) { + flushDictionary(); + } else { + // flushout any left over entries from dictionary + if (rows.size() > 0) { + flushDictionary(); + } + + // suppress the stream for every stripe if dictionary is disabled + stringOutput.suppress(); + } + + // we need to build the rowindex before calling super, since it + // writes it out. + super.writeStripe(builder, requiredIndexEntries); + stringOutput.flush(); + lengthOutput.flush(); + rowOutput.flush(); + directStreamOutput.flush(); + directLengthOutput.flush(); + // reset all of the fields to be ready for the next stripe. + dictionary.clear(); + savedRowIndex.clear(); + rowIndexValueCount.clear(); + recordPosition(rowIndexPosition); + rowIndexValueCount.add(0L); + + if (!useDictionaryEncoding) { + // record the start positions of first index stride of next stripe i.e + // beginning of the direct streams when dictionary is disabled + recordDirectStreamPosition(); + } + } + + private void flushDictionary() throws IOException { final int[] dumpOrder = new int[dictionary.size()]; if (useDictionaryEncoding) { @@ -1113,21 +1165,7 @@ class WriterImpl implements Writer, Memo } } } - // we need to build the rowindex before calling super, since it - // writes it out. - super.writeStripe(builder, requiredIndexEntries); - stringOutput.flush(); - lengthOutput.flush(); - rowOutput.flush(); - directStreamOutput.flush(); - directLengthOutput.flush(); - // reset all of the fields to be ready for the next stripe. - dictionary.clear(); rows.clear(); - savedRowIndex.clear(); - rowIndexValueCount.clear(); - recordPosition(rowIndexPosition); - rowIndexValueCount.add(0L); } @Override @@ -1165,10 +1203,30 @@ class WriterImpl implements Writer, Memo OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry(); rowIndexEntry.setStatistics(indexStatistics.serialize()); indexStatistics.reset(); - savedRowIndex.add(rowIndexEntry.build()); + OrcProto.RowIndexEntry base = rowIndexEntry.build(); + savedRowIndex.add(base); rowIndexEntry.clear(); recordPosition(rowIndexPosition); rowIndexValueCount.add(Long.valueOf(rows.size())); + if (strideDictionaryCheck) { + checkDictionaryEncoding(); + } + if (!useDictionaryEncoding) { + if (rows.size() > 0) { + flushDictionary(); + // just record the start positions of next index stride + recordDirectStreamPosition(); + } else { + // record the start positions of next index stride + recordDirectStreamPosition(); + getRowIndex().addEntry(base); + } + } + } + + private void recordDirectStreamPosition() throws IOException { + directStreamOutput.getPosition(rowIndexPosition); + directLengthOutput.getPosition(rowIndexPosition); } @Override Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Wed Sep 3 22:54:18 2014 @@ -109,6 +109,7 @@ import org.apache.hadoop.hive.shims.Hado import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; import com.google.common.collect.Sets; @@ -128,6 +129,7 @@ public class Hive { private HiveConf conf = null; private IMetaStoreClient metaStoreClient; + private UserGroupInformation owner; private static ThreadLocal hiveDB = new ThreadLocal() { @Override @@ -181,7 +183,11 @@ public class Hive { */ public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException { Hive db = hiveDB.get(); - if (db == null || needsRefresh) { + if (db == null || needsRefresh || !db.isCurrentUserOwner()) { + if (db != null) { + LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh + + ", db.isCurrentUserOwner = " + db.isCurrentUserOwner()); + } closeCurrent(); c.set("fs.scheme.class", "dfs"); Hive newdb = new Hive(c); @@ -194,6 +200,11 @@ public class Hive { public static Hive get() throws HiveException { Hive db = hiveDB.get(); + if (db != null && !db.isCurrentUserOwner()) { + LOG.debug("Creating new db. db.isCurrentUserOwner = " + db.isCurrentUserOwner()); + db.close(); + db = null; + } if (db == null) { SessionState session = SessionState.get(); db = new Hive(session == null ? new HiveConf(Hive.class) : session.getConf()); @@ -220,6 +231,17 @@ public class Hive { conf = c; } + + private boolean isCurrentUserOwner() throws HiveException { + try { + return owner == null || owner.equals(UserGroupInformation.getCurrentUser()); + } catch(IOException e) { + throw new HiveException("Error getting current user: " + e.getMessage(), e); + } + } + + + /** * closes the connection to metastore for the calling thread */ @@ -2496,6 +2518,13 @@ private void constructOneLBLocationMap(F @Unstable public IMetaStoreClient getMSC() throws MetaException { if (metaStoreClient == null) { + try { + owner = UserGroupInformation.getCurrentUser(); + } catch(IOException e) { + String msg = "Error getting current user: " + e.getMessage(); + LOG.error(msg, e); + throw new MetaException(msg + "\n" + StringUtils.stringifyException(e)); + } metaStoreClient = createMetaStoreClient(); } return metaStoreClient; Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java Wed Sep 3 22:54:18 2014 @@ -32,6 +32,7 @@ public class ColStatistics { private double avgColLen; private long numTrues; private long numFalses; + private Range range; public ColStatistics(String tabAlias, String colName, String colType) { this.setTableAlias(tabAlias); @@ -118,6 +119,17 @@ public class ColStatistics { this.numFalses = numFalses; } + public Range getRange() { + return range; + } + + public void setRange(Number minVal, Number maxVal) { + this.range = new Range(minVal, maxVal); + } + + public void setRange(Range r) { + this.range = r; + } @Override public String toString() { @@ -150,7 +162,24 @@ public class ColStatistics { clone.setNumNulls(numNulls); clone.setNumTrues(numTrues); clone.setNumFalses(numFalses); + if (range != null ) { + clone.setRange(range.clone()); + } return clone; } + public static class Range { + public final Number minValue; + public final Number maxValue; + Range(Number minValue, Number maxValue) { + super(); + this.minValue = minValue; + this.maxValue = maxValue; + } + @Override + public Range clone() { + return new Range(minValue, maxValue); + } + } + } Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original) +++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Wed Sep 3 22:54:18 2014 @@ -25,10 +25,12 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -76,6 +78,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.io.BytesWritable; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -404,18 +408,22 @@ public class StatsUtils { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); + cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { cs.setCountDistint(csd.getLongStats().getNumDVs()); cs.setNumNulls(csd.getLongStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); + cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive1()); + cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { cs.setCountDistint(csd.getDoubleStats().getNumDVs()); cs.setNumNulls(csd.getDoubleStats().getNumNulls()); cs.setAvgColLen(JavaDataModel.get().primitive2()); + cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { @@ -441,6 +449,13 @@ public class StatsUtils { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); cs.setCountDistint(csd.getDecimalStats().getNumDVs()); cs.setNumNulls(csd.getDecimalStats().getNumNulls()); + Decimal val = csd.getDecimalStats().getHighValue(); + BigDecimal maxVal = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); + val = csd.getDecimalStats().getLowValue(); + BigDecimal minVal = HiveDecimal. + create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue(); + cs.setRange(minVal, maxVal); } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); } else { Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original) +++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Wed Sep 3 22:54:18 2014 @@ -42,6 +42,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -1684,7 +1685,7 @@ public class TestOrcFile { } @Test - public void testMemoryManagement() throws Exception { + public void testMemoryManagementV11() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = ObjectInspectorFactory.getReflectionObjectInspector @@ -1699,7 +1700,8 @@ public class TestOrcFile { .stripeSize(50000) .bufferSize(100) .rowIndexStride(0) - .memory(memory)); + .memory(memory) + .version(Version.V_0_11)); assertEquals(testFilePath, memory.path); for(int i=0; i < 2500; ++i) { writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i))); @@ -1719,6 +1721,45 @@ public class TestOrcFile { } @Test + public void testMemoryManagementV12() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (InnerStruct.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .inspector(inspector) + .compress(CompressionKind.NONE) + .stripeSize(50000) + .bufferSize(100) + .rowIndexStride(0) + .memory(memory) + .version(Version.V_0_12)); + assertEquals(testFilePath, memory.path); + for(int i=0; i < 2500; ++i) { + writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i))); + } + writer.close(); + assertEquals(null, memory.path); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + int i = 0; + for(StripeInformation stripe: reader.getStripes()) { + i += 1; + assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), + stripe.getDataLength() < 5000); + } + // with HIVE-7832, the dictionaries will be disabled after writing the first + // stripe as there are too many distinct values. Hence only 3 stripes as + // compared to 25 stripes in version 0.11 (above test case) + assertEquals(3, i); + assertEquals(2500, reader.getNumberOfRows()); + } + + @Test public void testPredicatePushdown() throws Exception { ObjectInspector inspector; synchronized (TestOrcFile.class) { Modified: hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java URL: http://svn.apache.org/viewvc/hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java?rev=1622374&r1=1622373&r2=1622374&view=diff ============================================================================== --- hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java (original) +++ hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java Wed Sep 3 22:54:18 2014 @@ -24,6 +24,7 @@ import java.net.InetAddress; import java.net.Socket; import java.security.PrivilegedAction; import java.security.PrivilegedExceptionAction; +import java.util.Locale; import java.util.Map; import javax.security.auth.callback.Callback; @@ -79,11 +80,23 @@ public class HadoopThriftAuthBridge20S e } @Override - public Client createClientWithConf(String authType) { - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authType); - UserGroupInformation.setConfiguration(conf); - return new Client(); + public Client createClientWithConf(String authMethod) { + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getLoginUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current login user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return new Client(); + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return new Client(); + } } @Override @@ -105,15 +118,48 @@ public class HadoopThriftAuthBridge20S e } @Override - public UserGroupInformation getCurrentUGIWithConf(String authType) + public UserGroupInformation getCurrentUGIWithConf(String authMethod) throws IOException { - Configuration conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, authType); - UserGroupInformation.setConfiguration(conf); - return UserGroupInformation.getCurrentUser(); + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getCurrentUser(); + } catch(IOException e) { + throw new IllegalStateException("Unable to get current user: " + e, e); + } + if (loginUserHasCurrentAuthMethod(ugi, authMethod)) { + LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current."); + return ugi; + } else { + LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current."); + Configuration conf = new Configuration(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod); + UserGroupInformation.setConfiguration(conf); + return UserGroupInformation.getCurrentUser(); + } } /** + * Return true if the current login user is already using the given authMethod. + * + * Used above to ensure we do not create a new Configuration object and as such + * lose other settings such as the cluster to which the JVM is connected. Required + * for oozie since it does not have a core-site.xml see HIVE-7682 + */ + private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) { + AuthenticationMethod authMethod; + try { + // based on SecurityUtil.getAuthenticationMethod() + authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH)); + } catch (IllegalArgumentException iae) { + throw new IllegalArgumentException("Invalid attribute value for " + + HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae); + } + LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod()); + return ugi.getAuthenticationMethod().equals(authMethod); + } + + + /** * Read and return Hadoop SASL configuration which can be configured using * "hadoop.rpc.protection" * @param conf