Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D91D2200CBE for ; Fri, 23 Jun 2017 01:40:48 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id D7570160BFF; Thu, 22 Jun 2017 23:40:48 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 456BE160C00 for ; Fri, 23 Jun 2017 01:40:46 +0200 (CEST) Received: (qmail 98679 invoked by uid 500); 22 Jun 2017 23:40:44 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 97384 invoked by uid 99); 22 Jun 2017 23:40:42 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 22 Jun 2017 23:40:42 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8797CE968B; Thu, 22 Jun 2017 23:40:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Thu, 22 Jun 2017 23:41:09 -0000 Message-Id: <4e7aaf4d9fc940b79feb620d186d0751@git.apache.org> In-Reply-To: <4007d28c78354ba28baf6a15d9ddb7d7@git.apache.org> References: <4007d28c78354ba28baf6a15d9ddb7d7@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [33/34] hive git commit: HIVE-16589: Vectorization: Support Complex Types and GroupBy modes PARTIAL2, FINAL, and COMPLETE for AVG, VARIANCE (Matt McCline, reviewed by Jason Dere) archived-at: Thu, 22 Jun 2017 23:40:49 -0000 http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig new file mode 100644 index 0000000..da48a7c --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig @@ -0,0 +1,4717 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.conf; + +import com.google.common.base.Joiner; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; +import org.apache.hadoop.hive.conf.Validator.PatternSet; +import org.apache.hadoop.hive.conf.Validator.RangeValidator; +import org.apache.hadoop.hive.conf.Validator.RatioValidator; +import org.apache.hadoop.hive.conf.Validator.SizeValidator; +import org.apache.hadoop.hive.conf.Validator.StringSet; +import org.apache.hadoop.hive.conf.Validator.TimeValidator; +import org.apache.hadoop.hive.conf.Validator.WritableDirectoryValidator; +import org.apache.hadoop.hive.shims.Utils; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Shell; +import org.apache.hive.common.HiveCompat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URL; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Hive Configuration. + */ +public class HiveConf extends Configuration { + protected String hiveJar; + protected Properties origProp; + protected String auxJars; + private static final Logger l4j = LoggerFactory.getLogger(HiveConf.class); + private static boolean loadMetastoreConfig = false; + private static boolean loadHiveServer2Config = false; + private static URL hiveDefaultURL = null; + private static URL hiveSiteURL = null; + private static URL hivemetastoreSiteUrl = null; + private static URL hiveServer2SiteUrl = null; + + private static byte[] confVarByteArray = null; + + + private static final Map vars = new HashMap(); + private static final Map metaConfs = new HashMap(); + private final List restrictList = new ArrayList(); + private final Set hiddenSet = new HashSet(); + + private Pattern modWhiteListPattern = null; + private volatile boolean isSparkConfigUpdated = false; + private static final int LOG_PREFIX_LENGTH = 64; + + public boolean getSparkConfigUpdated() { + return isSparkConfigUpdated; + } + + public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { + this.isSparkConfigUpdated = isSparkConfigUpdated; + } + + public interface EncoderDecoder { + V encode(K key); + K decode(V value); + } + + public static class URLEncoderDecoder implements EncoderDecoder { + private static final String UTF_8 = "UTF-8"; + @Override + public String encode(String key) { + try { + return URLEncoder.encode(key, UTF_8); + } catch (UnsupportedEncodingException e) { + return key; + } + } + + @Override + public String decode(String value) { + try { + return URLDecoder.decode(value, UTF_8); + } catch (UnsupportedEncodingException e) { + return value; + } + } + } + public static class EncoderDecoderFactory { + public static final URLEncoderDecoder URL_ENCODER_DECODER = new URLEncoderDecoder(); + } + + static { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + if (classLoader == null) { + classLoader = HiveConf.class.getClassLoader(); + } + + hiveDefaultURL = classLoader.getResource("hive-default.xml"); + + // Look for hive-site.xml on the CLASSPATH and log its location if found. + hiveSiteURL = findConfigFile(classLoader, "hive-site.xml", true); + hivemetastoreSiteUrl = findConfigFile(classLoader, "hivemetastore-site.xml", false); + hiveServer2SiteUrl = findConfigFile(classLoader, "hiveserver2-site.xml", false); + + for (ConfVars confVar : ConfVars.values()) { + vars.put(confVar.varname, confVar); + } + + Set llapDaemonConfVarsSetLocal = new LinkedHashSet<>(); + populateLlapDaemonVarsSet(llapDaemonConfVarsSetLocal); + llapDaemonVarsSet = Collections.unmodifiableSet(llapDaemonConfVarsSetLocal); + } + + private static URL findConfigFile(ClassLoader classLoader, String name, boolean doLog) { + URL result = classLoader.getResource(name); + if (result == null) { + String confPath = System.getenv("HIVE_CONF_DIR"); + result = checkConfigFile(new File(confPath, name)); + if (result == null) { + String homePath = System.getenv("HIVE_HOME"); + String nameInConf = "conf" + File.pathSeparator + name; + result = checkConfigFile(new File(homePath, nameInConf)); + if (result == null) { + URI jarUri = null; + try { + jarUri = HiveConf.class.getProtectionDomain().getCodeSource().getLocation().toURI(); + } catch (Throwable e) { + if (l4j.isInfoEnabled()) { + l4j.info("Cannot get jar URI", e); + } + System.err.println("Cannot get jar URI: " + e.getMessage()); + } + result = checkConfigFile(new File(new File(jarUri).getParentFile(), nameInConf)); + } + } + } + if (doLog && l4j.isInfoEnabled()) { + l4j.info("Found configuration file " + result); + } + return result; + } + + private static URL checkConfigFile(File f) { + try { + return (f.exists() && f.isFile()) ? f.toURI().toURL() : null; + } catch (Throwable e) { + if (l4j.isInfoEnabled()) { + l4j.info("Error looking for config " + f, e); + } + System.err.println("Error looking for config " + f + ": " + e.getMessage()); + return null; + } + } + + + + + @InterfaceAudience.Private + public static final String PREFIX_LLAP = "llap."; + @InterfaceAudience.Private + public static final String PREFIX_HIVE_LLAP = "hive.llap."; + + /** + * Metastore related options that the db is initialized against. When a conf + * var in this is list is changed, the metastore instance for the CLI will + * be recreated so that the change will take effect. + */ + public static final HiveConf.ConfVars[] metaVars = { + HiveConf.ConfVars.METASTOREWAREHOUSE, + HiveConf.ConfVars.REPLDIR, + HiveConf.ConfVars.METASTOREURIS, + HiveConf.ConfVars.METASTORE_SERVER_PORT, + HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, + HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, + HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, + HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, + HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME, + HiveConf.ConfVars.METASTOREPWD, + HiveConf.ConfVars.METASTORECONNECTURLHOOK, + HiveConf.ConfVars.METASTORECONNECTURLKEY, + HiveConf.ConfVars.METASTORESERVERMINTHREADS, + HiveConf.ConfVars.METASTORESERVERMAXTHREADS, + HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE, + HiveConf.ConfVars.METASTORE_INT_ORIGINAL, + HiveConf.ConfVars.METASTORE_INT_ARCHIVED, + HiveConf.ConfVars.METASTORE_INT_EXTRACTED, + HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE, + HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, + HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, + HiveConf.ConfVars.METASTORE_TOKEN_SIGNATURE, + HiveConf.ConfVars.METASTORE_CACHE_PINOBJTYPES, + HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE, + HiveConf.ConfVars.METASTORE_VALIDATE_TABLES, + HiveConf.ConfVars.METASTORE_DATANUCLEUS_INIT_COL_INFO, + HiveConf.ConfVars.METASTORE_VALIDATE_COLUMNS, + HiveConf.ConfVars.METASTORE_VALIDATE_CONSTRAINTS, + HiveConf.ConfVars.METASTORE_STORE_MANAGER_TYPE, + HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL, + HiveConf.ConfVars.METASTORE_TRANSACTION_ISOLATION, + HiveConf.ConfVars.METASTORE_CACHE_LEVEL2, + HiveConf.ConfVars.METASTORE_CACHE_LEVEL2_TYPE, + HiveConf.ConfVars.METASTORE_IDENTIFIER_FACTORY, + HiveConf.ConfVars.METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK, + HiveConf.ConfVars.METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS, + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX, + HiveConf.ConfVars.METASTORE_EVENT_LISTENERS, + HiveConf.ConfVars.METASTORE_TRANSACTIONAL_EVENT_LISTENERS, + HiveConf.ConfVars.METASTORE_EVENT_CLEAN_FREQ, + HiveConf.ConfVars.METASTORE_EVENT_EXPIRY_DURATION, + HiveConf.ConfVars.METASTORE_EVENT_MESSAGE_FACTORY, + HiveConf.ConfVars.METASTORE_FILTER_HOOK, + HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL, + HiveConf.ConfVars.METASTORE_END_FUNCTION_LISTENERS, + HiveConf.ConfVars.METASTORE_PART_INHERIT_TBL_PROPS, + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX, + HiveConf.ConfVars.METASTORE_INIT_HOOKS, + HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS, + HiveConf.ConfVars.HMSHANDLERATTEMPTS, + HiveConf.ConfVars.HMSHANDLERINTERVAL, + HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF, + HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN, + HiveConf.ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS, + HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES, + HiveConf.ConfVars.USERS_IN_ADMIN_ROLE, + HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + HiveConf.ConfVars.HIVE_TXN_MANAGER, + HiveConf.ConfVars.HIVE_TXN_TIMEOUT, + HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES, + HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE, + HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, + HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER, + HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL, + HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL, + HiveConf.ConfVars.METASTORE_FASTPATH, + HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE, + HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE, + HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, + HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY, + HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, + HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, + HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, + HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, + HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL, + HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL, + HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS, + HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES, + HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL, + HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY, + HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL, + HiveConf.ConfVars.METASTORE_HBASE_FILE_METADATA_THREADS + }; + + /** + * User configurable Metastore vars + */ + public static final HiveConf.ConfVars[] metaConfVars = { + HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL, + HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL_DDL, + HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, + HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN, + HiveConf.ConfVars.METASTORE_CAPABILITY_CHECK + }; + + static { + for (ConfVars confVar : metaConfVars) { + metaConfs.put(confVar.varname, confVar); + } + } + + public static final String HIVE_LLAP_DAEMON_SERVICE_PRINCIPAL_NAME = "hive.llap.daemon.service.principal"; + public static final String HIVE_SERVER2_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME = + "hive.server2.authentication.ldap.userMembershipKey"; + + /** + * dbVars are the parameters can be set per database. If these + * parameters are set as a database property, when switching to that + * database, the HiveConf variable will be changed. The change of these + * parameters will effectively change the DFS and MapReduce clusters + * for different databases. + */ + public static final HiveConf.ConfVars[] dbVars = { + HiveConf.ConfVars.HADOOPBIN, + HiveConf.ConfVars.METASTOREWAREHOUSE, + HiveConf.ConfVars.SCRATCHDIR + }; + + /** + * Variables used by LLAP daemons. + * TODO: Eventually auto-populate this based on prefixes. The conf variables + * will need to be renamed for this. + */ + private static final Set llapDaemonVarsSet; + + private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal) { + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_ENABLED.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MODE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MIN_ALLOC.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MAX_ALLOC.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_ARENA_COUNT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_DIRECT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_USE_LRFU.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_LRFU_LAMBDA.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_USE_FILEID_PATH.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ORC_ENABLE_TIME_COUNTERS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_PRINCIPAL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_KEYTAB_FILE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_PRINCIPAL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_KEYTAB_FILE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_ZK_CONNECTION_STRING.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL_DENY.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL_DENY.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DELEGATION_TOKEN_LIFETIME.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_RPC_PORT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_WEB_AUTO_AUTH.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_NUM_HANDLERS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WORK_DIRS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_SHUFFLE_PORT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SHUFFLE_DIR_WATCHER_ENABLED.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_HEARTBEAT_INTERVAL_MS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_TIMEOUT_MS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_SLEEP_BETWEEN_RETRIES_MS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_PORT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_XMX_HEADROOM.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_REFRESH_INTERVAL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOW_PERMANENT_FNS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_DOWNLOAD_PERMANENT_FNS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WAIT_QUEUE_COMPARATOR_CLASS_NAME.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_ENABLE_PREEMPTION.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_PREEMPTION_METRICS_INTERVALS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_PORT.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_SSL.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_CONTAINER_ID.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname); + llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname); + } + + /** + * Get a set containing configuration parameter names used by LLAP Server isntances + * @return an unmodifiable set containing llap ConfVars + */ + public static final Set getLlapDaemonConfVars() { + return llapDaemonVarsSet; + } + + + /** + * ConfVars. + * + * These are the default configuration properties for Hive. Each HiveConf + * object is initialized as follows: + * + * 1) Hadoop configuration properties are applied. + * 2) ConfVar properties with non-null values are overlayed. + * 3) hive-site.xml properties are overlayed. + * + * WARNING: think twice before adding any Hadoop configuration properties + * with non-null values to this list as they will override any values defined + * in the underlying Hadoop configuration. + */ + public static enum ConfVars { + // QL execution stuff + SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""), + PLAN("hive.exec.plan", "", ""), + STAGINGDIR("hive.exec.stagingdir", ".hive-staging", + "Directory name that will be created inside table locations in order to support HDFS encryption. " + + "This is replaces ${hive.exec.scratchdir} for query results with the exception of read-only tables. " + + "In all cases ${hive.exec.scratchdir} is still used for other temporary files, such as job plans."), + SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive", + "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " + + "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/ is created, " + + "with ${hive.scratch.dir.permission}."), + REPLDIR("hive.repl.rootdir","/user/hive/repl/", + "HDFS root dir for all replication dumps."), + REPLCMENABLED("hive.repl.cm.enabled", false, + "Turn on ChangeManager, so delete files will go to cmrootdir."), + REPLCMDIR("hive.repl.cmrootdir","/user/hive/cmroot/", + "Root dir for ChangeManager, used for deleted files."), + REPLCMRETIAN("hive.repl.cm.retain","24h", + new TimeValidator(TimeUnit.HOURS), + "Time to retain removed files in cmrootdir."), + REPLCMINTERVAL("hive.repl.cm.interval","3600s", + new TimeValidator(TimeUnit.SECONDS), + "Inteval for cmroot cleanup thread."), + REPL_FUNCTIONS_ROOT_DIR("hive.repl.replica.functions.root.dir","/user/hive/repl/functions/", + "Root directory on the replica warehouse where the repl sub-system will store jars from the primary warehouse"), + LOCALSCRATCHDIR("hive.exec.local.scratchdir", + "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", + "Local scratch space for Hive jobs"), + DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", + "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources", + "Temporary local directory for added resources in the remote file system."), + SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", + "The permission for the user specific scratch directories that get created."), + SUBMITVIACHILD("hive.exec.submitviachild", false, ""), + SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true, + "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" + + "separate JVM (true recommended) or not. \n" + + "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."), + SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000, + "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" + + "This prevents runaway scripts from filling logs partitions to capacity"), + ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false, + "When enabled, this option allows a user script to exit successfully without consuming \n" + + "all the data from the standard input."), + STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:", + "Streaming jobs that log to standard error with this prefix can log counter or status information."), + STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true, + "Enable consumption of status and counter messages for streaming jobs."), + COMPRESSRESULT("hive.exec.compress.output", false, + "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" + + "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), + COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false, + "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" + + "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), + COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""), + COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""), + BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000), + "size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers."), + MAXREDUCERS("hive.exec.reducers.max", 1009, + "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" + + "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."), + PREEXECHOOKS("hive.exec.pre.hooks", "", + "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" + + "A pre-execution hook is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + POSTEXECHOOKS("hive.exec.post.hooks", "", + "Comma-separated list of post-execution hooks to be invoked for each statement. \n" + + "A post-execution hook is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + ONFAILUREHOOKS("hive.exec.failure.hooks", "", + "Comma-separated list of on-failure hooks to be invoked for each statement. \n" + + "An on-failure hook is specified as the name of Java class which implements the \n" + + "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), + QUERYREDACTORHOOKS("hive.exec.query.redactor.hooks", "", + "Comma-separated list of hooks to be invoked for each query which can \n" + + "tranform the query before it's placed in the job.xml file. Must be a Java class which \n" + + "extends from the org.apache.hadoop.hive.ql.hooks.Redactor abstract class."), + CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "", + "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" + + "A client stats publisher is specified as the name of a Java class which implements the \n" + + "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."), + ATSHOOKQUEUECAPACITY("hive.ats.hook.queue.capacity", 64, + "Queue size for the ATS Hook executor. If the number of outstanding submissions \n" + + "to the ATS executor exceed this amount, the Hive ATS Hook will not try to log queries to ATS."), + EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"), + EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8, + "How many jobs at most can be executed in parallel"), + HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true, + "Whether speculative execution for reducers should be turned on. "), + HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L, + "The interval with which to poll the JobTracker for the counters the running job. \n" + + "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."), + DYNAMICPARTITIONING("hive.exec.dynamic.partition", true, + "Whether or not to allow dynamic partitions in DML/DDL."), + DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict", + "In strict mode, the user must specify at least one static partition\n" + + "in case the user accidentally overwrites all partitions.\n" + + "In nonstrict mode all partitions are allowed to be dynamic."), + DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000, + "Maximum number of dynamic partitions allowed to be created in total."), + DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100, + "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."), + MAXCREATEDFILES("hive.exec.max.created.files", 100000L, + "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."), + DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", + "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" + + "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" + + "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."), + DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__", ""), + + // Whether to show a link to the most failed task + debugging tips + SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true, + "If a job fails, whether to provide a link in the CLI to the task with the\n" + + "most failures, along with debugging hints if applicable."), + JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true, + "Whether or not stack traces parsed from the task logs of a sampled failed task \n" + + "for each failed job should be stored in the SessionState"), + JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""), + TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""), + OUTPUT_FILE_EXTENSION("hive.output.file.extension", null, + "String used as a file extension for output files. \n" + + "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."), + + HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true), + HIVE_IN_TEST_SHORT_LOGS("hive.in.test.short.logs", false, + "internal usage only, used only in test mode. If set true, when requesting the " + + "operation logs the short version (generated by LogDivertAppenderForTest) will be " + + "returned"), + HIVE_IN_TEST_REMOVE_LOGS("hive.in.test.remove.logs", true, + "internal usage only, used only in test mode. If set false, the operation logs, and the " + + "operation log directory will not be removed, so they can be found after the test runs."), + + HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez", + true), + + LOCALMODEAUTO("hive.exec.mode.local.auto", false, + "Let Hive determine whether to run in local mode automatically"), + LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L, + "When hive.exec.mode.local.auto is true, input bytes should less than this for local mode."), + LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4, + "When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode."), + + DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true, + "Do not report an error if DROP TABLE/VIEW/Index/Function specifies a non-existent table/view/index/function"), + + HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"), + + HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100, + "maximum number of lines for footer user can define for a table file"), + + HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true, + "Make column names unique in the result set by qualifying column names with table alias if needed.\n" + + "Table alias will be added to column names for queries of type \"select *\" or \n" + + "if query explicitly uses table alias \"select r1.x..\"."), + + // Hadoop Configuration Properties + // Properties with null values are ignored and exist only for the purpose of giving us + // a symbolic name to reference in the Hive source code. Properties with non-null + // values will override any values set in the underlying Hadoop configuration. + HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true), + YARNBIN("yarn.bin.path", findYarnBinary(), "", true), + HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem", + "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"), + MAPREDMAXSPLITSIZE(FileInputFormat.SPLIT_MAXSIZE, 256000000L, "", true), + MAPREDMINSPLITSIZE(FileInputFormat.SPLIT_MINSIZE, 1L, "", true), + MAPREDMINSPLITSIZEPERNODE(CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 1L, "", true), + MAPREDMINSPLITSIZEPERRACK(CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 1L, "", true), + // The number of reduce tasks per job. Hadoop sets this value to 1 by default + // By setting this property to -1, Hive will automatically determine the correct + // number of reducers. + HADOOPNUMREDUCERS("mapreduce.job.reduces", -1, "", true), + + // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here! + METASTOREDBTYPE("hive.metastore.db.type", "DERBY", new StringSet("DERBY", "ORACLE", "MYSQL", "MSSQL", "POSTGRES"), + "Type of database used by the metastore. Information schema & JDBCStorageHandler depend on it."), + METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse", + "location of default database for the warehouse"), + METASTOREURIS("hive.metastore.uris", "", + "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), + + METASTORE_CAPABILITY_CHECK("hive.metastore.client.capability.check", true, + "Whether to check client capabilities for potentially breaking API usage."), + METASTORE_FASTPATH("hive.metastore.fastpath", false, + "Used to avoid all of the proxies and object copies in the metastore. Note, if this is " + + "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " + + "undefined and most likely undesired behavior will result"), + METASTORE_FS_HANDLER_THREADS_COUNT("hive.metastore.fshandler.threads", 15, + "Number of threads to be allocated for metastore handler for fs operations."), + METASTORE_HBASE_CATALOG_CACHE_SIZE("hive.metastore.hbase.catalog.cache.size", 50000, "Maximum number of " + + "objects we will place in the hbase metastore catalog cache. The objects will be divided up by " + + "types that we need to cache."), + METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.hbase.aggregate.stats.cache.size", 10000, + "Maximum number of aggregate stats nodes that we will place in the hbase metastore aggregate stats cache."), + METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.hbase.aggregate.stats.max.partitions", 10000, + "Maximum number of partitions that are aggregated per cache node."), + METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY("hive.metastore.hbase.aggregate.stats.false.positive.probability", + (float) 0.01, "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), + METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.hbase.aggregate.stats.max.variance", (float) 0.1, + "Maximum tolerable variance in number of partitions between a cached node and our request (default 10%)."), + METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), + "Number of seconds for a cached node to be active in the cache before they become stale."), + METASTORE_HBASE_CACHE_MAX_WRITER_WAIT("hive.metastore.hbase.cache.max.writer.wait", "5000ms", new TimeValidator(TimeUnit.MILLISECONDS), + "Number of milliseconds a writer will wait to acquire the writelock before giving up."), + METASTORE_HBASE_CACHE_MAX_READER_WAIT("hive.metastore.hbase.cache.max.reader.wait", "1000ms", new TimeValidator(TimeUnit.MILLISECONDS), + "Number of milliseconds a reader will wait to acquire the readlock before giving up."), + METASTORE_HBASE_CACHE_MAX_FULL("hive.metastore.hbase.cache.max.full", (float) 0.9, + "Maximum cache full % after which the cache cleaner thread kicks in."), + METASTORE_HBASE_CACHE_CLEAN_UNTIL("hive.metastore.hbase.cache.clean.until", (float) 0.8, + "The cleaner thread cleans until cache reaches this % full size."), + METASTORE_HBASE_CONNECTION_CLASS("hive.metastore.hbase.connection.class", + "org.apache.hadoop.hive.metastore.hbase.VanillaHBaseConnection", + "Class used to connection to HBase"), + METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES("hive.metastore.hbase.aggr.stats.cache.entries", + 10000, "How many in stats objects to cache in memory"), + METASTORE_HBASE_AGGR_STATS_MEMORY_TTL("hive.metastore.hbase.aggr.stats.memory.ttl", "60s", + new TimeValidator(TimeUnit.SECONDS), + "Number of seconds stats objects live in memory after they are read from HBase."), + METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY( + "hive.metastore.hbase.aggr.stats.invalidator.frequency", "5s", + new TimeValidator(TimeUnit.SECONDS), + "How often the stats cache scans its HBase entries and looks for expired entries"), + METASTORE_HBASE_AGGR_STATS_HBASE_TTL("hive.metastore.hbase.aggr.stats.hbase.ttl", "604800s", + new TimeValidator(TimeUnit.SECONDS), + "Number of seconds stats entries live in HBase cache after they are created. They may be" + + " invalided by updates or partition drops before this. Default is one week."), + METASTORE_HBASE_FILE_METADATA_THREADS("hive.metastore.hbase.file.metadata.threads", 1, + "Number of threads to use to read file metadata in background to cache it."), + + METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, + "Number of retries while opening a connection to metastore"), + METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1, + "Number of retries upon failure of Thrift metastore calls"), + METASTORE_SERVER_PORT("hive.metastore.port", 9083, "Hive metastore listener port"), + METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", "1s", + new TimeValidator(TimeUnit.SECONDS), + "Number of seconds for the client to wait between consecutive connection attempts"), + METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s", + new TimeValidator(TimeUnit.SECONDS), + "MetaStore Client socket timeout in seconds"), + METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s", + new TimeValidator(TimeUnit.SECONDS), + "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" + + "reconnects on the next MetaStore operation. A value of 0s means the connection\n" + + "has an infinite lifetime."), + METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine", + "password to use against metastore database"), + METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "", + "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"), + METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true, + "Set this to true if multiple threads access metastore through JDO concurrently."), + METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL", + "jdbc:derby:;databaseName=metastore_db;create=true", + "JDBC connect string for a JDBC metastore.\n" + + "To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" + + "For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."), + METASTORE_DBACCESS_SSL_PROPS("hive.metastore.dbaccess.ssl.properties", "", + "Comma-separated SSL properties for metastore to access database when JDO connection URL\n" + + "enables SSL access. e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd."), + HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10, + "The number of times to retry a HMSHandler call if there were a connection error."), + HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms", + new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."), + HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false, + "Whether to force reloading of the HMSHandler configuration (including\n" + + "the connection URL, before the next metastore query that accesses the\n" + + "datastore. Once reloaded, this value is reset to false. Used for\n" + + "testing only."), + METASTORESERVERMAXMESSAGESIZE("hive.metastore.server.max.message.size", 100*1024*1024L, + "Maximum message size in bytes a HMS will accept."), + METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200, + "Minimum number of worker threads in the Thrift server's pool."), + METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 1000, + "Maximum number of worker threads in the Thrift server's pool."), + METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true, + "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."), + + METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original", + "_INTERMEDIATE_ORIGINAL", + "Intermediate dir suffixes used for archiving. Not important what they\n" + + "are, as long as collisions are avoided"), + METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived", + "_INTERMEDIATE_ARCHIVED", ""), + METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted", + "_INTERMEDIATE_EXTRACTED", ""), + METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "", + "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."), + METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal", + "hive-metastore/_HOST@EXAMPLE.COM", + "The service principal for the metastore Thrift server. \n" + + "The special string _HOST will be replaced automatically with the correct host name."), + METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false, + "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."), + METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false, + "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."), + METASTORE_USE_THRIFT_COMPACT_PROTOCOL("hive.metastore.thrift.compact.protocol.enabled", false, + "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" + + "Setting it to true will break compatibility with older clients running TBinaryProtocol."), + METASTORE_TOKEN_SIGNATURE("hive.metastore.token.signature", "", + "The delegation token service name to match when selecting a token from the current user's tokens."), + METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS("hive.cluster.delegation.token.store.class", + "org.apache.hadoop.hive.thrift.MemoryTokenStore", + "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."), + METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR( + "hive.cluster.delegation.token.store.zookeeper.connectString", "", + "The ZooKeeper token store connect string. You can re-use the configuration value\n" + + "set in hive.zookeeper.quorum, by leaving this parameter unset."), + METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE( + "hive.cluster.delegation.token.store.zookeeper.znode", "/hivedelegation", + "The root path for token store data. Note that this is used by both HiveServer2 and\n" + + "MetaStore to store delegation Token. One directory gets created for each of them.\n" + + "The final directory names would have the servername appended to it (HIVESERVER2,\n" + + "METASTORE)."), + METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL( + "hive.cluster.delegation.token.store.zookeeper.acl", "", + "ACL for token store entries. Comma separated list of ACL entries. For example:\n" + + "sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa\n" + + "Defaults to all permissions for the hiveserver2/metastore process user."), + METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order", + "List of comma separated metastore object types that should be pinned in the cache"), + METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "HikariCP", new StringSet("BONECP", "DBCP", + "HikariCP", "NONE"), + "Specify connection pool library for datanucleus"), + METASTORE_CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize", 10, + "Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" + + "2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" + + "recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" + + "configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+ + "(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" + + "(2 * physical_core_count + hard_disk_count)."), + // Workaround for DN bug on Postgres: + // http://www.datanucleus.org/servlet/forum/viewthread_thread,7985_offset + METASTORE_DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo", "NONE", + "initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."), + METASTORE_VALIDATE_TABLES("datanucleus.schema.validateTables", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_VALIDATE_COLUMNS("datanucleus.schema.validateColumns", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints", false, + "validates existing schema against code. turn this on if you want to verify existing schema"), + METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"), + METASTORE_AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", false, + "Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once." + + "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not " + + "recommended for production use cases, run schematool command instead." ), + METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", true, + "Enforce metastore schema version consistency.\n" + + "True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" + + " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + + " proper metastore schema migration. (Default)\n" + + "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."), + METASTORE_SCHEMA_VERIFICATION_RECORD_VERSION("hive.metastore.schema.verification.record.version", false, + "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" + + " enabled the MS will be unusable."), + METASTORE_SCHEMA_INFO_CLASS("hive.metastore.schema.info.class", + "org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo", + "Fully qualified class name for the metastore schema information class \n" + + "which is used by schematool to fetch the schema information.\n" + + " This class should implement the IMetaStoreSchemaInfo interface"), + METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed", + "Default transaction isolation level for identity generation."), + METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false, + "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"), + METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""), + METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1", + "Name of the identifier factory to use when generating table/column names etc. \n" + + "'datanucleus1' is used for backward compatibility with DataNucleus v1"), + METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""), + METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG", + "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"), + METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300, + "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" + + "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" + + "but it may also cause higher memory requirement at the client side."), + METASTORE_BATCH_RETRIEVE_OBJECTS_MAX( + "hive.metastore.batch.retrieve.table.partition.max", 1000, + "Maximum number of objects that metastore internally retrieves in one batch."), + + METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "", + "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" + + "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."), + METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "", + "List of comma separated listeners for metastore events."), + METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "", + "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" + + " interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " + + "Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."), + METASTORE_TRANSACTIONAL_EVENT_LISTENERS("hive.metastore.transactional.event.listeners", "", + "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" + + " interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction."), + METASTORE_EVENT_DB_LISTENER_TTL("hive.metastore.event.db.listener.timetolive", "86400s", + new TimeValidator(TimeUnit.SECONDS), + "time after which events will be removed from the database listener queue"), + METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false, + "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" + + "for operations like drop-partition (disallow the drop-partition if the user in\n" + + "question doesn't have permissions to delete the corresponding directory\n" + + "on the storage)."), + METASTORE_AUTHORIZATION_EXTERNALTABLE_DROP_CHECK("hive.metastore.authorization.storage.check.externaltable.drop", true, + "Should StorageBasedAuthorization check permission of the storage before dropping external table.\n" + + "StorageBasedAuthorization already does this check for managed table. For external table however,\n" + + "anyone who has read permission of the directory could drop external table, which is surprising.\n" + + "The flag is set to false by default to maintain backward compatibility."), + METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", "0s", + new TimeValidator(TimeUnit.SECONDS), + "Frequency at which timer task runs to purge expired events in metastore."), + METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", "0s", + new TimeValidator(TimeUnit.SECONDS), + "Duration after which events expire from events table"), + METASTORE_EVENT_MESSAGE_FACTORY("hive.metastore.event.message.factory", + "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", + "Factory class for making encoding and decoding messages in the events generated."), + METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true, + "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" + + "the client's reported user and group permissions. Note that this property must be set on \n" + + "both the client and server sides. Further note that its best effort. \n" + + "If client sets its to true and server sets it to false, client setting will be ignored."), + METASTORE_PARTITION_NAME_WHITELIST_PATTERN("hive.metastore.partition.name.whitelist.pattern", "", + "Partition names will be checked against this regex pattern and rejected if not matched."), + + METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false, + "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" + + "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" + + "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" + + "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" + + "is also irrelevant."), + METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true, + "Whether the Hive metastore should try to use direct SQL queries instead of the\n" + + "DataNucleus for certain read paths. This can improve metastore performance when\n" + + "fetching many partitions or column statistics by orders of magnitude; however, it\n" + + "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" + + "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" + + "work for all queries on your datastore. If all SQL queries fail (for example, your\n" + + "metastore is backed by MongoDB), you might want to disable this to save the\n" + + "try-and-fall-back cost."), + METASTORE_DIRECT_SQL_PARTITION_BATCH_SIZE("hive.metastore.direct.sql.batch.size", 0, + "Batch size for partition and other object retrieval from the underlying DB in direct\n" + + "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" + + "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" + + "may impede performance. -1 means no batching, 0 means automatic batching."), + METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true, + "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" + + "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" + + "select query has incorrect syntax or something similar inside a transaction, the\n" + + "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" + + "should disable the usage of direct SQL inside transactions if that happens in your case."), + METASTORE_DIRECT_SQL_MAX_QUERY_LENGTH("hive.direct.sql.max.query.length", 100, "The maximum\n" + + " size of a query string (in KB)."), + METASTORE_DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("hive.direct.sql.max.elements.in.clause", 1000, + "The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" + + " multiple OR separated IN clauses."), + METASTORE_DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("hive.direct.sql.max.elements.values.clause", + 1000, "The maximum number of values in a VALUES clause for INSERT statement."), + METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, + "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + + "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + + "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + + "pruning is the correct behaviour"), + METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES( + "hive.metastore.disallow.incompatible.col.type.changes", true, + "If true (default is false), ALTER TABLE operations which change the type of a\n" + + "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" + + "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" + + "datatypes can be converted from string to any type. The map is also serialized as\n" + + "a string, which can be read as a string as well. However, with any binary\n" + + "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" + + "when subsequently trying to access old partitions.\n" + + "\n" + + "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" + + "not blocked.\n" + + "\n" + + "See HIVE-4409 for more details."), + METASTORE_LIMIT_PARTITION_REQUEST("hive.metastore.limit.partition.request", -1, + "This limits the number of partitions that can be requested from the metastore for a given table.\n" + + "The default value \"-1\" means no limit."), + + NEWTABLEDEFAULTPARA("hive.table.parameters.default", "", + "Default property values for newly created tables"), + DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", "", + "Table Properties to copy over when executing a Create Table Like."), + METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", + "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" + + "This class is used to store and retrieval of raw metadata objects such as table, database"), + METASTORE_CACHED_RAW_STORE_IMPL("hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", + "Name of the wrapped RawStore class"), + METASTORE_CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY( + "hive.metastore.cached.rawstore.cache.update.frequency", "60", new TimeValidator( + TimeUnit.SECONDS), + "The time after which metastore cache is updated from metastore DB."), + METASTORE_TXN_STORE_IMPL("hive.metastore.txn.store.impl", + "org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler", + "Name of class that implements org.apache.hadoop.hive.metastore.txn.TxnStore. This " + + "class is used to store and retrieve transactions and locks"), + METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", + "Driver class name for a JDBC metastore"), + METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass", + "org.datanucleus.api.jdo.JDOPersistenceManagerFactory", + "class implementing the jdo persistence"), + METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy", + "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""), + METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true, + "Detaches all objects from session so that they can be used after transaction is committed"), + METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true, + "Reads outside of transactions"), + METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP", + "Username to use against metastore database"), + METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "", + "List of comma separated listeners for the end of metastore functions."), + METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties", "", + "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" + + "* implies all the keys will get inherited."), + METASTORE_FILTER_HOOK("hive.metastore.filter.hook", "org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl", + "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager" + + "is set to instance of HiveAuthorizerFactory, then this value is ignored."), + FIRE_EVENTS_FOR_DML("hive.metastore.dml.events", false, "If true, the metastore will be asked" + + " to fire events for DML operations"), + METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS("hive.metastore.client.drop.partitions.using.expressions", true, + "Choose whether dropping partitions with HCatClient pushes the partition-predicate to the metastore, " + + "or drops partitions iteratively"), + + METASTORE_AGGREGATE_STATS_CACHE_ENABLED("hive.metastore.aggregate.stats.cache.enabled", true, + "Whether aggregate stats caching is enabled or not."), + METASTORE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.aggregate.stats.cache.size", 10000, + "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."), + METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.aggregate.stats.cache.max.partitions", 10000, + "Maximum number of partitions that are aggregated per cache node."), + METASTORE_AGGREGATE_STATS_CACHE_FPP("hive.metastore.aggregate.stats.cache.fpp", (float) 0.01, + "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), + METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.aggregate.stats.cache.max.variance", (float) 0.01, + "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."), + METASTORE_AGGREGATE_STATS_CACHE_TTL("hive.metastore.aggregate.stats.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), + "Number of seconds for a cached node to be active in the cache before they become stale."), + METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("hive.metastore.aggregate.stats.cache.max.writer.wait", "5000ms", + new TimeValidator(TimeUnit.MILLISECONDS), + "Number of milliseconds a writer will wait to acquire the writelock before giving up."), + METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT("hive.metastore.aggregate.stats.cache.max.reader.wait", "1000ms", + new TimeValidator(TimeUnit.MILLISECONDS), + "Number of milliseconds a reader will wait to acquire the readlock before giving up."), + METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL("hive.metastore.aggregate.stats.cache.max.full", (float) 0.9, + "Maximum cache full % after which the cache cleaner thread kicks in."), + METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL("hive.metastore.aggregate.stats.cache.clean.until", (float) 0.8, + "The cleaner thread cleans until cache reaches this % full size."), + METASTORE_METRICS("hive.metastore.metrics.enabled", false, "Enable metrics on the metastore."), + METASTORE_INIT_METADATA_COUNT_ENABLED("hive.metastore.initial.metadata.count.enabled", true, + "Enable a metadata count at metastore startup for metrics."), + + // Metastore SSL settings + HIVE_METASTORE_USE_SSL("hive.metastore.use.SSL", false, + "Set this to true for using SSL encryption in HMS server."), + HIVE_METASTORE_SSL_KEYSTORE_PATH("hive.metastore.keystore.path", "", + "Metastore SSL certificate keystore location."), + HIVE_METASTORE_SSL_KEYSTORE_PASSWORD("hive.metastore.keystore.password", "", + "Metastore SSL certificate keystore password."), + HIVE_METASTORE_SSL_TRUSTSTORE_PATH("hive.metastore.truststore.path", "", + "Metastore SSL certificate truststore location."), + HIVE_METASTORE_SSL_TRUSTSTORE_PASSWORD("hive.metastore.truststore.password", "", + "Metastore SSL certificate truststore password."), + + // Parameters for exporting metadata on table drop (requires the use of the) + // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener + METADATA_EXPORT_LOCATION("hive.metadata.export.location", "", + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" + + "metadata being exported to the current user's home directory on HDFS."), + MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true, + "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + + "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" + + "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."), + + // CLI + CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""), + CLIPRINTCURRENTDB("hive.cli.print.current.db", false, + "Whether to include the current database in the Hive prompt."), + CLIPROMPT("hive.cli.prompt", "hive", + "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" + + "Variable substitution will only be invoked at the Hive CLI startup."), + CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1, + "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" + + "If the value of this property is -1, then Hive will use the auto-detected terminal width."), + + HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""), + + // Things we log in the jobconf + + // session identifier + HIVESESSIONID("hive.session.id", "", ""), + // whether session is running in silent mode or not + HIVESESSIONSILENT("hive.session.silent", false, ""), + + HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, + "Whether to log Hive query, query plan, runtime statistics etc."), + + HIVEQUERYSTRING("hive.query.string", "", + "Query being executed (might be multiple per a session)"), + + HIVEQUERYID("hive.query.id", "", + "ID for query being executed (might be multiple per a session)"), + + HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"), + + // hive jar + HIVEJAR("hive.jar.path", "", + "The location of hive_cli.jar that is used when submitting jobs in a separate jvm."), + HIVEAUXJARS("hive.aux.jars.path", "", + "The location of the plugin jars that contain implementations of user defined functions and serdes."), + + // reloadable jars + HIVERELOADABLEJARS("hive.reloadable.aux.jars.path", "", + "The locations of the plugin jars, which can be a comma-separated folders or jars. Jars can be renewed\n" + + "by executing reload command. And these jars can be " + + "used as the auxiliary classes like creating a UDF or SerDe."), + + // hive added files and jars + HIVEADDEDFILES("hive.added.files.path", "", "This an internal parameter."), + HIVEADDEDJARS("hive.added.jars.path", "", "This an internal parameter."), + HIVEADDEDARCHIVES("hive.added.archives.path", "", "This an internal parameter."), + + HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true), + + // for hive script operator + HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", "0s", + new TimeValidator(TimeUnit.SECONDS), + "How long to run autoprogressor for the script/UDTF operators.\n" + + "Set to 0 for forever."), + HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false, + "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" + + "to avoid the task getting killed because of inactivity. Hive sends progress information when the script is \n" + + "outputting to stderr. This option removes the need of periodically producing stderr messages, \n" + + "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."), + HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID", + "Name of the environment variable that holds the unique script operator ID in the user's \n" + + "transform function (the custom mapper/reducer that the user has specified in the query)"), + HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false, + "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"), + HIVESCRIPT_ENV_BLACKLIST("hive.script.operator.env.blacklist", + "hive.txn.valid.txns,hive.script.operator.env.blacklist", + "Comma separated list of keys from the configuration file not to convert to environment " + + "variables when envoking the script operator"), + HIVE_STRICT_CHECKS_LARGE_QUERY("hive.strict.checks.large.query", false, + "Enabling strict large query checks disallows the following:\n" + + " Orderby without limit.\n" + + " No partition being picked up for a query against partitioned table.\n" + + "Note that these checks currently do not consider data size, only the query pattern."), + HIVE_STRICT_CHECKS_TYPE_SAFETY("hive.strict.checks.type.safety", true, + "Enabling strict type safety checks disallows the following:\n" + + " Comparing bigints and strings.\n" + + " Comparing bigints and doubles."), + HIVE_STRICT_CHECKS_CARTESIAN("hive.strict.checks.cartesian.product", true, + "Enabling strict Cartesian join checks disallows the following:\n" + + " Cartesian product (cross join)."), + HIVE_STRICT_CHECKS_BUCKETING("hive.strict.checks.bucketing", true, + "Enabling strict bucketing checks disallows the following:\n" + + " Load into bucketed tables."), + + @Deprecated + HIVEMAPREDMODE("hive.mapred.mode", null, + "Deprecated; use hive.strict.checks.* settings instead."), + HIVEALIAS("hive.alias", "", ""), + HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"), + HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"), + HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000, + "How many rows in the right-most join operand Hive should buffer before emitting the join result."), + HIVEJOINCACHESIZE("hive.join.cache.size", 25000, + "How many rows in the joining tables (except the streaming table) should be cached in memory."), + HIVE_PUSH_RESIDUAL_INNER("hive.join.inner.residual", false, + "Whether to push non-equi filter predicates within inner joins. This can improve efficiency in " + + "the evaluation of certain joins, since we will not be emitting rows which are thrown away by " + + "a Filter operator straight away. However, currently vectorization does not support them, thus " + + "enabling it is only recommended when vectorization is disabled."), + + // CBO related + HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."), + HIVE_CBO_CNF_NODES_LIMIT("hive.cbo.cnf.maxnodes", -1, "When converting to conjunctive normal form (CNF), fail if" + + "the expression exceeds this threshold; the threshold is expressed in terms of number of nodes (leaves and" + + "interior nodes). -1 to not set up a threshold."), + HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"), + HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on" + + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."), + HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"), + HIVE_CBO_COST_MODEL_NET("hive.cbo.costmodel.network", "150.0", "Default cost of a transfering a byte over network;" + + " expressed as multiple of CPU cost"), + HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;" + + " expressed as multiple of NETWORK cost"), + HIVE_CBO_COST_MODEL_LFS_READ("hive.cbo.costmodel.local.fs.read", "4.0", "Default cost of reading a byte from local FS;" + + " expressed as multiple of NETWORK cost"), + HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;" + + " expressed as multiple of Local FS write cost"), + HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + + " expressed as multiple of Local FS read cost"), + HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true, + "Toggle display of CBO warnings like missing column stats"), + AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), + SEMIJOIN_CONVERSION("hive.optimize.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), + HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + + "columns in operators such as Aggregate or Join so that we try to reduce the number of shuffling stages"), + + // materialized views + HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting", false, + "Whether to try to rewrite queries using the materialized views enabled for rewriting"), + HIVE_MATERIALIZED_VIEW_FILE_FORMAT("hive.materializedview.fileformat", "ORC", + new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), + "Default file format for CREATE MATERIALIZED VIEW statement"), + HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde", + "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"), + + // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, + // need to remove by hive .13. Also, do not change default (see SMB operator) + HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), + + HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true, + "Whether Hive should use memory-optimized hash table for MapJoin.\n" + + "Only works on Tez and Spark, because memory-optimized hashtable cannot be serialized."), + HIVEMAPJOINOPTIMIZEDTABLEPROBEPERCENT("hive.mapjoin.optimized.hashtable.probe.percent", + (float) 0.5, "Probing space percentage of the optimized hashtable"), + HIVEUSEHYBRIDGRACEHASHJOIN("hive.mapjoin.hybridgrace.hashtable", true, "Whether to use hybrid" + + "grace hash join as the join method for mapjoin. Tez only."), + HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " + + "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " + + "This number should be power of 2."), + HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" + + "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."), + HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" + + "Hybrid grace hash join, the minimum number of partitions to create."), + HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024, + "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + + "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + + "joins unnecessary memory will be allocated and then trimmed."), + HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + + "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), + + HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, + "How many rows with the same key value should be cached in memory per smb joined table."), + HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, + "Number of rows after which size of the grouping keys/aggregation classes is performed"), + HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, + "Portion of total memory to be used by map-side group aggregation hash table"), + HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, + "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), + HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, + "The max memory to be used by map-side group aggregation hash table.\n" + + "If the memory usage is higher than this number, force to flush data"), + HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, + "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + + "Set to 1 to make sure hash aggregation is never turned off."), + HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, + "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + + "common group by keys, it will be optimized to generate single M/R job."), + HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, + "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + + "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + + "is that it limits the number of mappers to the number of files."), + HIVE_GROUPBY_POSITION_ALIAS("hive.groupby.position.alias", false, + "Whether to enable using Column Position Alias in Group By"), + HIVE_ORDERBY_POSITION_ALIAS("hive.orderby.position.alias", true, + "Whether to enable using Column Position Alias in Order By"), + @Deprecated + HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, + "Whether to enable using Column Position Alias in Group By or Order By (deprecated).\n" + + "Use " + HIVE_ORDERBY_POSITION_ALIAS.varname + " or " + HIVE_GROUPBY_POSITION_ALIAS.varname + " instead"), + HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, + "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" + + "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" + + "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" + + "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" + + "and map-side aggregation does not do a very good job. \n" + + "\n" + + "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" + + "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" + + "assumption that the original group by will reduce the data size."), + HIVE_GROUPBY_LIMIT_EXTRASTEP("hive.groupby.limit.extrastep", true, "This parameter decides if Hive should \n" + + "create new MR job for sorting final output"), + + // Max file num and size used to do a single copy (after that, distcp is used) + HIVE_EXEC_COPYFILE_MAXNUMFILES("hive.exec.copyfile.maxnumfiles", 1L, + "Maximum number of files Hive uses to do sequential HDFS copies between directories." + + "Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."), + HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/, + "Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." + + "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."), + + // for hive udtf operator + HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false, + "Whether Hive should automatically send progress information to TaskTracker \n" + + "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" + + "because this may prevent TaskTracker from killing tasks with infinite loops."), + + HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), + "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), + HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none", + new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), + "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" + + "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" + + "for all tables."), + HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "SequenceFile", new StringSet("TextFile", "SequenceFile", "RCfile", "Llap"), + "Default file format for storing result of the query."), + HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"), + + // default serde for rcfile + HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde", + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe", + "The default SerDe Hive will use for the RCFile format"), + + HIVEDEFAULTSERDE("hive.default.serde", + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "The default SerDe Hive will use for storage formats that do not specify a SerDe."), + + SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema", + "org.apache.hadoop.hive.ql.io.orc.OrcSerde," + + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," + + "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + + "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," + + "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + + "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," + + "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", + "SerDes retrieving schema from metastore. This is an internal parameter."), + + HIVEHISTORYFILELOC("hive.querylog.location", + "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", + "Location of Hive run time structured log file"), + + HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true, + "Whether to log the plan's progress every time a job's progress is checked.\n" + + "These logs are written to the location specified by hive.querylog.location"), + + HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", "60000ms", + new TimeValidator(TimeUnit.MILLISECONDS), + "The interval to wait between logging the plan's progress.\n" + + "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" + + "the progress is logged regardless of this value.\n" + + "The actual interval will be the ceiling of (this value divided by the value of\n" + + "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" + + "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" + + "logged less frequently than specified.\n" + + "This only has an effect if hive.querylog.enable.plan.progress is set to true."), + + HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + "The default SerDe for transmitting input data to and reading output data from the user scripts. "), + HIVESCRIPTRECORDREADER("hive.script.recordreader", + "org.apache.hadoop.hive.ql.exec.TextRecordReader", + "The default record reader for reading data from the user scripts. "), + HIVESCRIPTRECORDWRITER("hive.script.recordwriter", + "org.apache.hadoop.hive.ql.exec.TextRecordWriter", + "The default record writer for writing data to the user scripts. "), + HIVESCRIPTESCAPE("hive.transform.escape.input", false, + "This adds an option to escape special chars (newlines, carriage returns and\n" + + "tabs) when they are passed to the user script. This is useful if the Hive tables\n" + + "can contain data that contains special characters."), + HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000, + "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" + + "The last record before the end of stream can have less than hive.binary.record.max.length bytes"), + + HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"), + + //small table file size + HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L, + "The threshold for the input file size of the small tables; if the file size is smaller \n" + + "than this threshold, it will try to convert the common join into map join"), + + + HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", true, + "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."), + + HIVE_TRANSACTIONAL_TABLE_SCAN("hive.transactional.table.scan", false, + "internal usage only -- do transaction (ACID) table scan.", true), + + HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY("hive.transactional.events.mem", 10000000, + "Vectorized ACID readers can often load all the delete events from all the delete deltas\n" + + "into memory to optimize for performance. To prevent out-of-memory errors, this is a rough heuristic\n" + + "that limits the total number of delete events that can be loaded into memory at once.\n" + + "Roughly it has been set to 10 million delete events per bucket (~160 MB).\n"), + + HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0, + "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."), + + // test