hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
Subject [09/10] hive git commit: HIVE-12290 Native Vector ReduceSink (Matt McCline, reviewed by Gopal V)
Date Sun, 01 Nov 2015 04:32:54 GMT
diff --git a/common/src/java/org/apache/hadoop/hive/conf/ b/common/src/java/org/apache/hadoop/hive/conf/
new file mode 100644
index 0000000..f05f224
--- /dev/null
+++ b/common/src/java/org/apache/hadoop/hive/conf/
@@ -0,0 +1,3369 @@
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.conf;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
+import org.apache.hadoop.hive.conf.Validator.PatternSet;
+import org.apache.hadoop.hive.conf.Validator.RangeValidator;
+import org.apache.hadoop.hive.conf.Validator.RatioValidator;
+import org.apache.hadoop.hive.conf.Validator.StringSet;
+import org.apache.hadoop.hive.conf.Validator.TimeValidator;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.hive.shims.Utils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.Shell;
+import org.apache.hive.common.HiveCompat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+ * Hive Configuration.
+ */
+public class HiveConf extends Configuration {
+  protected String hiveJar;
+  protected Properties origProp;
+  protected String auxJars;
+  private static final Logger l4j = LoggerFactory.getLogger(HiveConf.class);
+  private static boolean loadMetastoreConfig = false;
+  private static boolean loadHiveServer2Config = false;
+  private static URL hiveDefaultURL = null;
+  private static URL hiveSiteURL = null;
+  private static URL hivemetastoreSiteUrl = null;
+  private static URL hiveServer2SiteUrl = null;
+  private static byte[] confVarByteArray = null;
+  private static final Map<String, ConfVars> vars = new HashMap<String, ConfVars>();
+  private static final Map<String, ConfVars> metaConfs = new HashMap<String, ConfVars>();
+  private final List<String> restrictList = new ArrayList<String>();
+  private final Set<String> hiddenSet = new HashSet<String>();
+  private Pattern modWhiteListPattern = null;
+  private volatile boolean isSparkConfigUpdated = false;
+  private static final int LOG_PREFIX_LENGTH = 64;
+  public boolean getSparkConfigUpdated() {
+    return isSparkConfigUpdated;
+  }
+  public void setSparkConfigUpdated(boolean isSparkConfigUpdated) {
+    this.isSparkConfigUpdated = isSparkConfigUpdated;
+  }
+  static {
+    ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+    if (classLoader == null) {
+      classLoader = HiveConf.class.getClassLoader();
+    }
+    hiveDefaultURL = classLoader.getResource("hive-default.xml");
+    // Look for hive-site.xml on the CLASSPATH and log its location if found.
+    hiveSiteURL = classLoader.getResource("hive-site.xml");
+    hivemetastoreSiteUrl = classLoader.getResource("hivemetastore-site.xml");
+    hiveServer2SiteUrl = classLoader.getResource("hiveserver2-site.xml");
+    for (ConfVars confVar : ConfVars.values()) {
+      vars.put(confVar.varname, confVar);
+    }
+  }
+  /**
+   * Metastore related options that the db is initialized against. When a conf
+   * var in this is list is changed, the metastore instance for the CLI will
+   * be recreated so that the change will take effect.
+   */
+  public static final HiveConf.ConfVars[] metaVars = {
+      HiveConf.ConfVars.METASTOREWAREHOUSE,
+      HiveConf.ConfVars.METASTOREURIS,
+      HiveConf.ConfVars.METASTORE_SERVER_PORT,
+      HiveConf.ConfVars.METASTOREPWD,
+      HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE,
+      HiveConf.ConfVars.METASTORE_INT_ORIGINAL,
+      HiveConf.ConfVars.METASTORE_INT_ARCHIVED,
+      HiveConf.ConfVars.METASTORE_CACHE_LEVEL2,
+      HiveConf.ConfVars.METASTORE_FILTER_HOOK,
+      HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL,
+      HiveConf.ConfVars.METASTORE_INIT_HOOKS,
+      HiveConf.ConfVars.HMSHANDLERATTEMPTS,
+      HiveConf.ConfVars.HMSHANDLERINTERVAL,
+      HiveConf.ConfVars.USERS_IN_ADMIN_ROLE,
+      HiveConf.ConfVars.HIVE_TXN_MANAGER,
+      HiveConf.ConfVars.HIVE_TXN_TIMEOUT,
+      HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH,
+      HiveConf.ConfVars.METASTORE_FASTPATH,
+      };
+  /**
+   * User configurable Metastore vars
+   */
+  public static final HiveConf.ConfVars[] metaConfVars = {
+      HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL,
+  };
+  static {
+    for (ConfVars confVar : metaConfVars) {
+      metaConfs.put(confVar.varname, confVar);
+    }
+  }
+  /**
+   * dbVars are the parameters can be set per database. If these
+   * parameters are set as a database property, when switching to that
+   * database, the HiveConf variable will be changed. The change of these
+   * parameters will effectively change the DFS and MapReduce clusters
+   * for different databases.
+   */
+  public static final HiveConf.ConfVars[] dbVars = {
+    HiveConf.ConfVars.HADOOPBIN,
+    HiveConf.ConfVars.SCRATCHDIR
+  };
+  /**
+   * ConfVars.
+   *
+   * These are the default configuration properties for Hive. Each HiveConf
+   * object is initialized as follows:
+   *
+   * 1) Hadoop configuration properties are applied.
+   * 2) ConfVar properties with non-null values are overlayed.
+   * 3) hive-site.xml properties are overlayed.
+   *
+   * WARNING: think twice before adding any Hadoop configuration properties
+   * with non-null values to this list as they will override any values defined
+   * in the underlying Hadoop configuration.
+   */
+  public static enum ConfVars {
+    // QL execution stuff
+    SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""),
+    PLAN("hive.exec.plan", "", ""),
+    PLAN_SERIALIZATION("hive.plan.serialization.format", "kryo",
+        "Query plan format serialization between client and task nodes. \n" +
+        "Two supported values are : kryo and javaXML. Kryo is default."),
+    STAGINGDIR("hive.exec.stagingdir", ".hive-staging",
+        "Directory name that will be created inside table locations in order to support HDFS encryption. " +
+        "This is replaces ${hive.exec.scratchdir} for query results with the exception of read-only tables. " +
+        "In all cases ${hive.exec.scratchdir} is still used for other temporary files, such as job plans."),
+    SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive",
+        "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " +
+        "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, " +
+        "with ${hive.scratch.dir.permission}."),
+    LOCALSCRATCHDIR("hive.exec.local.scratchdir",
+        "${}" + File.separator + "${}",
+        "Local scratch space for Hive jobs"),
+    DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir",
+        "${}" + File.separator + "${}_resources",
+        "Temporary local directory for added resources in the remote file system."),
+    SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700",
+        "The permission for the user specific scratch directories that get created."),
+    SUBMITVIACHILD("hive.exec.submitviachild", false, ""),
+    SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true,
+        "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" +
+        "separate JVM (true recommended) or not. \n" +
+        "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."),
+    SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000,
+        "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" +
+        "This prevents runaway scripts from filling logs partitions to capacity"),
+    ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false,
+        "When enabled, this option allows a user script to exit successfully without consuming \n" +
+        "all the data from the standard input."),
+    STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:",
+        "Streaming jobs that log to standard error with this prefix can log counter or status information."),
+    STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true,
+        "Enable consumption of status and counter messages for streaming jobs."),
+    COMPRESSRESULT("hive.exec.compress.output", false,
+        "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" +
+        "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"),
+    COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false,
+        "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" +
+        "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"),
+    COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""),
+    COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""),
+    BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000),
+        "size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers."),
+    MAXREDUCERS("hive.exec.reducers.max", 1009,
+        "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" +
+        "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."),
+    PREEXECHOOKS("hive.exec.pre.hooks", "",
+        "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" +
+        "A pre-execution hook is specified as the name of a Java class which implements the \n" +
+        "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
+    POSTEXECHOOKS("", "",
+        "Comma-separated list of post-execution hooks to be invoked for each statement. \n" +
+        "A post-execution hook is specified as the name of a Java class which implements the \n" +
+        "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
+    ONFAILUREHOOKS("hive.exec.failure.hooks", "",
+        "Comma-separated list of on-failure hooks to be invoked for each statement. \n" +
+        "An on-failure hook is specified as the name of Java class which implements the \n" +
+        "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."),
+    QUERYREDACTORHOOKS("hive.exec.query.redactor.hooks", "",
+        "Comma-separated list of hooks to be invoked for each query which can \n" +
+        "tranform the query before it's placed in the job.xml file. Must be a Java class which \n" +
+        "extends from the org.apache.hadoop.hive.ql.hooks.Redactor abstract class."),
+    CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "",
+        "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" +
+        "A client stats publisher is specified as the name of a Java class which implements the \n" +
+        "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."),
+    EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"),
+    EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8,
+        "How many jobs at most can be executed in parallel"),
+    HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true,
+        "Whether speculative execution for reducers should be turned on. "),
+    HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L,
+        "The interval with which to poll the JobTracker for the counters the running job. \n" +
+        "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."),
+    DYNAMICPARTITIONING("hive.exec.dynamic.partition", true,
+        "Whether or not to allow dynamic partitions in DML/DDL."),
+    DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict",
+        "In strict mode, the user must specify at least one static partition\n" +
+        "in case the user accidentally overwrites all partitions.\n" +
+        "In nonstrict mode all partitions are allowed to be dynamic."),
+    DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000,
+        "Maximum number of dynamic partitions allowed to be created in total."),
+    DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100,
+        "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."),
+    MAXCREATEDFILES("hive.exec.max.created.files", 100000L,
+        "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."),
+        "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" +
+        "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" +
+        "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."),
+    // Whether to show a link to the most failed task + debugging tips
+        "If a job fails, whether to provide a link in the CLI to the task with the\n" +
+        "most failures, along with debugging hints if applicable."),
+    JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true,
+        "Whether or not stack traces parsed from the task logs of a sampled failed task \n" +
+        "for each failed job should be stored in the SessionState"),
+    JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""),
+    TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""),
+    OUTPUT_FILE_EXTENSION("hive.output.file.extension", null,
+        "String used as a file extension for output files. \n" +
+        "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."),
+    HIVE_IN_TEST("", false, "internal usage only, true in test mode", true),
+    HIVE_IN_TEZ_TEST("", false, "internal use only, true when in testing tez",
+        true),
+    LOCALMODEAUTO("", false,
+        "Let Hive determine whether to run in local mode automatically"),
+    LOCALMODEMAXBYTES("", 134217728L,
+        "When is true, input bytes should less than this for local mode."),
+        "When is true, the number of tasks should less than this for local mode."),
+    DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true,
+        "Do not report an error if DROP TABLE/VIEW/Index/Function specifies a non-existent table/view/index/function"),
+    HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"),
+    HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100,
+        "maximum number of lines for footer user can define for a table file"),
+    HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true,
+        "Make column names unique in the result set by qualifying column names with table alias if needed.\n" +
+        "Table alias will be added to column names for queries of type \"select *\" or \n" +
+        "if query explicitly uses table alias \"select r1.x..\"."),
+    // Hadoop Configuration Properties
+    // Properties with null values are ignored and exist only for the purpose of giving us
+    // a symbolic name to reference in the Hive source code. Properties with non-null
+    // values will override any values set in the underlying Hadoop configuration.
+    HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true),
+    HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem",
+        "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"),
+    HADOOPFS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPFS"), null, "", true),
+    HADOOPMAPFILENAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPFILENAME"), null, "", true),
+    HADOOPMAPREDINPUTDIR(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIR"), null, "", true),
+    HADOOPMAPREDINPUTDIRRECURSIVE(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPMAPREDINPUTDIRRECURSIVE"), false, "", true),
+    MAPREDMAXSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMAXSPLITSIZE"), 256000000L, "", true),
+    MAPREDMINSPLITSIZE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"), 1L, "", true),
+    MAPREDMINSPLITSIZEPERNODE(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERNODE"), 1L, "", true),
+    MAPREDMINSPLITSIZEPERRACK(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZEPERRACK"), 1L, "", true),
+    // The number of reduce tasks per job. Hadoop sets this value to 1 by default
+    // By setting this property to -1, Hive will automatically determine the correct
+    // number of reducers.
+    HADOOPNUMREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPNUMREDUCERS"), -1, "", true),
+    HADOOPJOBNAME(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPJOBNAME"), null, "", true),
+    HADOOPSPECULATIVEEXECREDUCERS(ShimLoader.getHadoopShims().getHadoopConfNames().get("HADOOPSPECULATIVEEXECREDUCERS"), true, "", true),
+    MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false, "", true),
+    MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false, "", true),
+    // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here!
+    METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse",
+        "location of default database for the warehouse"),
+    METASTOREURIS("hive.metastore.uris", "",
+        "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."),
+    METASTORE_FASTPATH("hive.metastore.fastpath", false,
+        "Used to avoid all of the proxies and object copies in the metastore.  Note, if this is " +
+            "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " +
+            "undefined and most likely undesired behavior will result"),
+    METASTORE_HBASE_CATALOG_CACHE_SIZE("hive.metastore.hbase.catalog.cache.size", 50000, "Maximum number of " +
+        "objects we will place in the hbase metastore catalog cache.  The objects will be divided up by " +
+        "types that we need to cache."),
+    METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.hbase.aggregate.stats.cache.size", 10000,
+        "Maximum number of aggregate stats nodes that we will place in the hbase metastore aggregate stats cache."),
+    METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.hbase.aggregate.stats.max.partitions", 10000,
+        "Maximum number of partitions that are aggregated per cache node."),
+    METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY("hive.metastore.hbase.aggregate.stats.false.positive.probability",
+        (float) 0.01, "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
+    METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.hbase.aggregate.stats.max.variance", (float) 0.1,
+        "Maximum tolerable variance in number of partitions between a cached node and our request (default 10%)."),
+    METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS),
+        "Number of seconds for a cached node to be active in the cache before they become stale."),
+    METASTORE_HBASE_CACHE_MAX_WRITER_WAIT("hive.metastore.hbase.cache.max.writer.wait", "5000ms", new TimeValidator(TimeUnit.MILLISECONDS),
+        "Number of milliseconds a writer will wait to acquire the writelock before giving up."),
+    METASTORE_HBASE_CACHE_MAX_READER_WAIT("hive.metastore.hbase.cache.max.reader.wait", "1000ms", new TimeValidator(TimeUnit.MILLISECONDS),
+         "Number of milliseconds a reader will wait to acquire the readlock before giving up."),
+    METASTORE_HBASE_CACHE_MAX_FULL("hive.metastore.hbase.cache.max.full", (float) 0.9,
+         "Maximum cache full % after which the cache cleaner thread kicks in."),
+    METASTORE_HBASE_CACHE_CLEAN_UNTIL("hive.metastore.hbase.cache.clean.until", (float) 0.8,
+          "The cleaner thread cleans until cache reaches this % full size."),
+    METASTORE_HBASE_CONNECTION_CLASS("hive.metastore.hbase.connection.class",
+        "org.apache.hadoop.hive.metastore.hbase.VanillaHBaseConnection",
+        "Class used to connection to HBase"),
+    METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES("hive.metastore.hbase.aggr.stats.cache.entries",
+        10000, "How many in stats objects to cache in memory"),
+    METASTORE_HBASE_AGGR_STATS_MEMORY_TTL("hive.metastore.hbase.aggr.stats.memory.ttl", "60s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "Number of seconds stats objects live in memory after they are read from HBase."),
+        "hive.metastore.hbase.aggr.stats.invalidator.frequency", "5s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "How often the stats cache scans its HBase entries and looks for expired entries"),
+    METASTORE_HBASE_AGGR_STATS_HBASE_TTL("hive.metastore.hbase.aggr.stats.hbase.ttl", "604800s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "Number of seconds stats entries live in HBase cache after they are created.  They may be" +
+            " invalided by updates or partition drops before this.  Default is one week."),
+    METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3,
+        "Number of retries while opening a connection to metastore"),
+    METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1,
+        "Number of retries upon failure of Thrift metastore calls"),
+    METASTORE_SERVER_PORT("hive.metastore.port", 9083, "Hive metastore listener port"),
+    METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", "1s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "Number of seconds for the client to wait between consecutive connection attempts"),
+    METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "MetaStore Client socket timeout in seconds"),
+    METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" +
+        "reconnects on the next MetaStore operation. A value of 0s means the connection\n" +
+        "has an infinite lifetime."),
+    METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine",
+        "password to use against metastore database"),
+    METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "",
+        "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"),
+    METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true,
+        "Set this to true if multiple threads access metastore through JDO concurrently."),
+    METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL",
+        "jdbc:derby:;databaseName=metastore_db;create=true",
+        "JDBC connect string for a JDBC metastore"),
+    HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10,
+        "The number of times to retry a HMSHandler call if there were a connection error."),
+    HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms",
+        new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."),
+    HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false,
+        "Whether to force reloading of the HMSHandler configuration (including\n" +
+        "the connection URL, before the next metastore query that accesses the\n" +
+        "datastore. Once reloaded, this value is reset to false. Used for\n" +
+        "testing only."),
+    METASTORESERVERMAXMESSAGESIZE("hive.metastore.server.max.message.size", 100*1024*1024,
+        "Maximum message size in bytes a HMS will accept."),
+    METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200,
+        "Minimum number of worker threads in the Thrift server's pool."),
+    METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 1000,
+        "Maximum number of worker threads in the Thrift server's pool."),
+    METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true,
+        "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."),
+    METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original",
+        "Intermediate dir suffixes used for archiving. Not important what they\n" +
+        "are, as long as collisions are avoided"),
+    METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived",
+    METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted",
+    METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "",
+        "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."),
+    METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal",
+        "hive-metastore/_HOST@EXAMPLE.COM",
+        "The service principal for the metastore Thrift server. \n" +
+        "The special string _HOST will be replaced automatically with the correct host name."),
+    METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false,
+        "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."),
+    METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false,
+        "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."),
+    METASTORE_USE_THRIFT_COMPACT_PROTOCOL("hive.metastore.thrift.compact.protocol.enabled", false,
+        "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" +
+        "Setting it to true will break compatibility with older clients running TBinaryProtocol."),
+        "org.apache.hadoop.hive.thrift.MemoryTokenStore",
+        "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."),
+        "", "",
+        "The ZooKeeper token store connect string. You can re-use the configuration value\n" +
+        "set in hive.zookeeper.quorum, by leaving this parameter unset."),
+        "", "/hivedelegation",
+        "The root path for token store data. Note that this is used by both HiveServer2 and\n" +
+        "MetaStore to store delegation Token. One directory gets created for each of them.\n" +
+        "The final directory names would have the servername appended to it (HIVESERVER2,\n" +
+        "METASTORE)."),
+        "", "",
+        "ACL for token store entries. Comma separated list of ACL entries. For example:\n" +
+        "sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa\n" +
+        "Defaults to all permissions for the hiveserver2/metastore process user."),
+    METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order",
+        "List of comma separated metastore object types that should be pinned in the cache"),
+    METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "BONECP",
+        "Specify connection pool library for datanucleus"),
+    METASTORE_VALIDATE_TABLES("datanucleus.validateTables", false,
+        "validates existing schema against code. turn this on if you want to verify existing schema"),
+    METASTORE_VALIDATE_COLUMNS("datanucleus.validateColumns", false,
+        "validates existing schema against code. turn this on if you want to verify existing schema"),
+    METASTORE_VALIDATE_CONSTRAINTS("datanucleus.validateConstraints", false,
+        "validates existing schema against code. turn this on if you want to verify existing schema"),
+    METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"),
+    METASTORE_AUTO_CREATE_SCHEMA("datanucleus.autoCreateSchema", true,
+        "creates necessary schema on a startup if one doesn't exist. set this to false, after creating it once"),
+    METASTORE_FIXED_DATASTORE("datanucleus.fixedDatastore", false, ""),
+    METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", false,
+        "Enforce metastore schema version consistency.\n" +
+        "True: Verify that version information stored in metastore matches with one from Hive jars.  Also disable automatic\n" +
+        "      schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" +
+        "      proper metastore schema migration. (Default)\n" +
+        "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."),
+    METASTORE_SCHEMA_VERIFICATION_RECORD_VERSION("hive.metastore.schema.verification.record.version", true,
+      "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" +
+      " enabled the MS will be unusable."),
+    METASTORE_AUTO_START_MECHANISM_MODE("datanucleus.autoStartMechanismMode", "checked",
+        "throw exception if metadata tables are incorrect"),
+    METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed",
+        "Default transaction isolation level for identity generation."),
+    METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false,
+        "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"),
+    METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""),
+    METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1",
+        "Name of the identifier factory to use when generating table/column names etc. \n" +
+        "'datanucleus1' is used for backward compatibility with DataNucleus v1"),
+    METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""),
+    METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG",
+        "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"),
+    METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300,
+        "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" +
+        "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" +
+        "but it may also cause higher memory requirement at the client side."),
+        "hive.metastore.batch.retrieve.table.partition.max", 1000,
+        "Maximum number of objects that metastore internally retrieves in one batch."),
+    METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "",
+        "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" +
+        "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."),
+    METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "",
+        "List of comma separated listeners for metastore events."),
+    METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "", ""),
+    METASTORE_EVENT_DB_LISTENER_TTL("hive.metastore.event.db.listener.timetolive", "86400s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "time after which events will be removed from the database listener queue"),
+        "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" +
+        "for operations like drop-partition (disallow the drop-partition if the user in\n" +
+        "question doesn't have permissions to delete the corresponding directory\n" +
+        "on the storage)."),
+    METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", "0s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "Frequency at which timer task runs to purge expired events in metastore."),
+    METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", "0s",
+        new TimeValidator(TimeUnit.SECONDS),
+        "Duration after which events expire from events table"),
+    METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true,
+        "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" +
+        "the client's reported user and group permissions. Note that this property must be set on \n" +
+        "both the client and server sides. Further note that its best effort. \n" +
+        "If client sets its to true and server sets it to false, client setting will be ignored."),
+        "Partition names will be checked against this regex pattern and rejected if not matched."),
+    METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false,
+        "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" +
+        "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" +
+        "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" +
+        "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" +
+        "is also irrelevant."),
+        "Whether the Hive metastore should try to use direct SQL queries instead of the\n" +
+        "DataNucleus for certain read paths. This can improve metastore performance when\n" +
+        "fetching many partitions or column statistics by orders of magnitude; however, it\n" +
+        "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" +
+        "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" +
+        "work for all queries on your datastore. If all SQL queries fail (for example, your\n" +
+        "metastore is backed by MongoDB), you might want to disable this to save the\n" +
+        "try-and-fall-back cost."),
+        "Batch size for partition and other object retrieval from the underlying DB in direct\n" +
+        "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" +
+        "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" +
+        "may impede performance. -1 means no batching, 0 means automatic batching."),
+        "Same as, for read statements within a transaction that\n" +
+        "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" +
+        "select query has incorrect syntax or something similar inside a transaction, the\n" +
+        "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" +
+        "should disable the usage of direct SQL inside transactions if that happens in your case."),
+    METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false,
+        "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " +
+        "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " +
+        "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " +
+        "pruning is the correct behaviour"),
+        "hive.metastore.disallow.incompatible.col.type.changes", false,
+        "If true (default is false), ALTER TABLE operations which change the type of a\n" +
+        "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" +
+        "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" +
+        "datatypes can be converted from string to any type. The map is also serialized as\n" +
+        "a string, which can be read as a string as well. However, with any binary\n" +
+        "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" +
+        "when subsequently trying to access old partitions.\n" +
+        "\n" +
+        "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" +
+        "not blocked.\n" +
+        "\n" +
+        "See HIVE-4409 for more details."),
+    NEWTABLEDEFAULTPARA("hive.table.parameters.default", "",
+        "Default property values for newly created tables"),
+        "Table Properties to copy over when executing a Create Table Like."),
+    METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore",
+        "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" +
+        "This class is used to store and retrieval of raw metadata objects such as table, database"),
+    METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver",
+        "Driver class name for a JDBC metastore"),
+    METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass",
+        "org.datanucleus.api.jdo.JDOPersistenceManagerFactory",
+        "class implementing the jdo persistence"),
+    METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy",
+        "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""),
+    METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true,
+        "Detaches all objects from session so that they can be used after transaction is committed"),
+    METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true,
+        "Reads outside of transactions"),
+    METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP",
+        "Username to use against metastore database"),
+    METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "",
+        "List of comma separated listeners for the end of metastore functions."),
+        "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" +
+        "* implies all the keys will get inherited."),
+    METASTORE_FILTER_HOOK("hive.metastore.filter.hook", "org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl",
+        "Metastore hook class for filtering the metadata read results. If"
+        + "is set to instance of HiveAuthorizerFactory, then this value is ignored."),
+    FIRE_EVENTS_FOR_DML("", false, "If true, the metastore will be asked" +
+        " to fire events for DML operations"),
+    METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS("hive.metastore.client.drop.partitions.using.expressions", true,
+        "Choose whether dropping partitions with HCatClient pushes the partition-predicate to the metastore, " +
+            "or drops partitions iteratively"),
+    METASTORE_AGGREGATE_STATS_CACHE_ENABLED("hive.metastore.aggregate.stats.cache.enabled", true,
+        "Whether aggregate stats caching is enabled or not."),
+    METASTORE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.aggregate.stats.cache.size", 10000,
+        "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."),
+    METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.aggregate.stats.cache.max.partitions", 10000,
+        "Maximum number of partitions that are aggregated per cache node."),
+    METASTORE_AGGREGATE_STATS_CACHE_FPP("hive.metastore.aggregate.stats.cache.fpp", (float) 0.01,
+        "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."),
+    METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.aggregate.stats.cache.max.variance", (float) 0.01,
+        "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."),
+    METASTORE_AGGREGATE_STATS_CACHE_TTL("hive.metastore.aggregate.stats.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS),
+        "Number of seconds for a cached node to be active in the cache before they become stale."),
+    METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("hive.metastore.aggregate.stats.cache.max.writer.wait", "5000ms",
+        new TimeValidator(TimeUnit.MILLISECONDS),
+        "Number of milliseconds a writer will wait to acquire the writelock before giving up."),
+    METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT("hive.metastore.aggregate.stats.cache.max.reader.wait", "1000ms",
+        new TimeValidator(TimeUnit.MILLISECONDS),
+        "Number of milliseconds a reader will wait to acquire the readlock before giving up."),
+    METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL("hive.metastore.aggregate.stats.cache.max.full", (float) 0.9,
+        "Maximum cache full % after which the cache cleaner thread kicks in."),
+    METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL("hive.metastore.aggregate.stats.cache.clean.until", (float) 0.8,
+        "The cleaner thread cleans until cache reaches this % full size."),
+    METASTORE_METRICS("hive.metastore.metrics.enabled", false, "Enable metrics on the metastore."),
+    // Parameters for exporting metadata on table drop (requires the use of the)
+    // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener
+    METADATA_EXPORT_LOCATION("hive.metadata.export.location", "",
+        "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
+        "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" +
+        "metadata being exported to the current user's home directory on HDFS."),
+        "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" +
+        "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" +
+        "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."),
+    // CLI
+    CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""),
+    CLIPRINTCURRENTDB("hive.cli.print.current.db", false,
+        "Whether to include the current database in the Hive prompt."),
+    CLIPROMPT("hive.cli.prompt", "hive",
+        "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" +
+        "Variable substitution will only be invoked at the Hive CLI startup."),
+    CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1,
+        "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" +
+        "If the value of this property is -1, then Hive will use the auto-detected terminal width."),
+    HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""),
+    // Things we log in the jobconf
+    // session identifier
+    HIVESESSIONID("", "", ""),
+    // whether session is running in silent mode or not
+    HIVESESSIONSILENT("hive.session.silent", false, ""),
+    HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false,
+        "Whether to log Hive query, query plan, runtime statistics etc."),
+    HIVEQUERYSTRING("hive.query.string", "",
+        "Query being executed (might be multiple per a session)"),
+    HIVEQUERYID("", "",
+        "ID for query being executed (might be multiple per a session)"),
+    HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"),
+    // hive jar
+    HIVEJAR("hive.jar.path", "",
+        "The location of hive_cli.jar that is used when submitting jobs in a separate jvm."),
+    HIVEAUXJARS("hive.aux.jars.path", "",
+        "The location of the plugin jars that contain implementations of user defined functions and serdes."),
+    // reloadable jars
+    HIVERELOADABLEJARS("hive.reloadable.aux.jars.path", "",
+        "Jars can be renewed by executing reload command. And these jars can be "
+            + "used as the auxiliary classes like creating a UDF or SerDe."),
+    // hive added files and jars
+    HIVEADDEDFILES("hive.added.files.path", "", "This an internal parameter."),
+    HIVEADDEDJARS("hive.added.jars.path", "", "This an internal parameter."),
+    HIVEADDEDARCHIVES("hive.added.archives.path", "", "This an internal parameter."),
+    HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true),
+    // for hive script operator
+        new TimeValidator(TimeUnit.SECONDS),
+        "How long to run autoprogressor for the script/UDTF operators.\n" +
+        "Set to 0 for forever."),
+        "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" +
+        "to avoid the task getting killed because of inactivity.  Hive sends progress information when the script is \n" +
+        "outputting to stderr.  This option removes the need of periodically producing stderr messages, \n" +
+        "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."),
+        "Name of the environment variable that holds the unique script operator ID in the user's \n" +
+        "transform function (the custom mapper/reducer that the user has specified in the query)"),
+    HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false,
+        "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"),
+    HIVESCRIPT_ENV_BLACKLIST("hive.script.operator.env.blacklist",
+        "hive.txn.valid.txns,hive.script.operator.env.blacklist",
+        "Comma separated list of keys from the configuration file not to convert to environment " +
+        "variables when envoking the script operator"),
+    HIVEMAPREDMODE("hive.mapred.mode", "nonstrict",
+        "The mode in which the Hive operations are being performed. \n" +
+        "In strict mode, some risky queries are not allowed to run. They include:\n" +
+        "  Cartesian Product.\n" +
+        "  No partition being picked up for a query.\n" +
+        "  Comparing bigints and strings.\n" +
+        "  Comparing bigints and doubles.\n" +
+        "  Orderby without limit."),
+    HIVEALIAS("hive.alias", "", ""),
+    HIVEMAPSIDEAGGREGATE("", true, "Whether to use map-side aggregation in Hive Group By queries"),
+    HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"),
+    HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000,
+        "How many rows in the right-most join operand Hive should buffer before emitting the join result."),
+    HIVEJOINCACHESIZE("hive.join.cache.size", 25000,
+        "How many rows in the joining tables (except the streaming table) should be cached in memory."),
+    // CBO related
+    HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."),
+    HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"),
+    HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on"
+                                 + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."),
+    HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"),
+    HIVE_CBO_COST_MODEL_NET("", "150.0", "Default cost of a transfering a byte over network;"
+                                                                  + " expressed as multiple of CPU cost"),
+    HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;"
+                                                                             + " expressed as multiple of NETWORK cost"),
+    HIVE_CBO_COST_MODEL_LFS_READ("", "4.0", "Default cost of reading a byte from local FS;"
+                                                                           + " expressed as multiple of NETWORK cost"),
+    HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;"
+                                                                 + " expressed as multiple of Local FS write cost"),
+    HIVE_CBO_COST_MODEL_HDFS_READ("", "1.5", "Default cost of reading a byte from HDFS;"
+                                                                 + " expressed as multiple of Local FS read cost"),
+    AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
+    // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
+    // need to remove by hive .13. Also, do not change default (see SMB operator)
+    HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""),
+    HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true,
+        "Whether Hive should use memory-optimized hash table for MapJoin. Only works on Tez,\n" +
+        "because memory-optimized hashtable cannot be serialized."),
+    HIVEMAPJOINOPTIMIZEDTABLEPROBEPERCENT("hive.mapjoin.optimized.hashtable.probe.percent",
+        (float) 0.5, "Probing space percentage of the optimized hashtable"),
+    HIVEUSEHYBRIDGRACEHASHJOIN("hive.mapjoin.hybridgrace.hashtable", true, "Whether to use hybrid" +
+        "grace hash join as the join method for mapjoin. Tez only."),
+    HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " +
+        "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " +
+        "This number should be power of 2."),
+    HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" +
+        "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."),
+    HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" +
+        "Hybrid grace hash join, the minimum number of partitions to create."),
+    HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024,
+        "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" +
+        "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" +
+        "joins unnecessary memory will be allocated and then trimmed."),
+    HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000,
+        "How many rows with the same key value should be cached in memory per smb joined table."),
+    HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000,
+        "Number of rows after which size of the grouping keys/aggregation classes is performed"),
+    HIVEMAPAGGRHASHMEMORY("", (float) 0.5,
+        "Portion of total memory to be used by map-side group aggregation hash table"),
+        "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"),
+        "The max memory to be used by map-side group aggregation hash table.\n" +
+        "If the memory usage is higher than this number, force to flush data"),
+        "Hash aggregation will be turned off if the ratio between hash  table size and input rows is bigger than this number. \n" +
+        "Set to 1 to make sure hash aggregation is never turned off."),
+    HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
+        "Whether to optimize multi group by query to generate single M/R  job plan. If the multi group by query has \n" +
+        "common group by keys, it will be optimized to generate single M/R job."),
+    HIVE_MAP_GROUPBY_SORT("", false,
+        "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" +
+        "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" +
+        "is that it limits the number of mappers to the number of files."),
+        "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" +
+        "the group by in the mapper by using BucketizedHiveInputFormat. If the test mode is set, the plan\n" +
+        "is not converted, but a query property is set to denote the same."),
+    HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false,
+        "Whether to enable using Column Position Alias in Group By or Order By"),
+        "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" +
+        "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" +
+        "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" +
+        "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" +
+        "and map-side aggregation does not do a very good job. \n" +
+        "\n" +
+        "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" +
+        "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" +
+        "assumption that the original group by will reduce the data size."),
+    // Max filesize used to do a single copy (after that, distcp is used)
+    HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/,
+        "Maximum file size (in Mb) that Hive uses to do single HDFS copies between directories." +
+        "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."),
+    // for hive udtf operator
+        "Whether Hive should automatically send progress information to TaskTracker \n" +
+        "when using UDTF's to prevent the task getting killed because of inactivity.  Users should be cautious \n" +
+        "because this may prevent TaskTracker from killing tasks with infinite loops."),
+    HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC"),
+        "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"),
+    HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none",
+  new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"),
+  "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" +
+  "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" +
+  "for all tables."),
+    HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile"),
+        "Default file format for storing result of the query."),
+    HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"),
+    // default serde for rcfile
+    HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde",
+        "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe",
+        "The default SerDe Hive will use for the RCFile format"),
+    HIVEDEFAULTSERDE("hive.default.serde",
+        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
+        "The default SerDe Hive will use for storage formats that do not specify a SerDe."),
+    SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema",
+        "," +
+        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
+        "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," +
+        "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," +
+        "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," +
+        "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
+        "," +
+        "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe",
+        "SerDes retrieving schema from metastore. This is an internal parameter."),
+    HIVEHISTORYFILELOC("hive.querylog.location",
+        "${}" + File.separator + "${}",
+        "Location of Hive run time structured log file"),
+    HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true,
+        "Whether to log the plan's progress every time a job's progress is checked.\n" +
+        "These logs are written to the location specified by hive.querylog.location"),
+    HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", "60000ms",
+        new TimeValidator(TimeUnit.MILLISECONDS),
+        "The interval to wait between logging the plan's progress.\n" +
+        "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" +
+        "the progress is logged regardless of this value.\n" +
+        "The actual interval will be the ceiling of (this value divided by the value of\n" +
+        "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" +
+        "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" +
+        "logged less frequently than specified.\n" +
+        "This only has an effect if hive.querylog.enable.plan.progress is set to true."),
+    HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
+        "The default SerDe for transmitting input data to and reading output data from the user scripts. "),
+    HIVESCRIPTRECORDREADER("hive.script.recordreader",
+        "org.apache.hadoop.hive.ql.exec.TextRecordReader",
+        "The default record reader for reading data from the user scripts. "),
+    HIVESCRIPTRECORDWRITER("hive.script.recordwriter",
+        "org.apache.hadoop.hive.ql.exec.TextRecordWriter",
+        "The default record writer for writing data to the user scripts. "),
+    HIVESCRIPTESCAPE("hive.transform.escape.input", false,
+        "This adds an option to escape special chars (newlines, carriage returns and\n" +
+        "tabs) when they are passed to the user script. This is useful if the Hive tables\n" +
+        "can contain data that contains special characters."),
+    HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000,
+        "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" +
+        "The last record before the end of stream can have less than hive.binary.record.max.length bytes"),
+    // HWI
+    HIVEHWILISTENHOST("", "", "This is the host address the Hive Web Interface will listen on"),
+    HIVEHWILISTENPORT("hive.hwi.listen.port", "9999", "This is the port the Hive Web Interface will listen on"),
+    HIVEHWIWARFILE("hive.hwi.war.file", "${env:HWI_WAR_FILE}",
+        "This sets the path to the HWI war file, relative to ${HIVE_HOME}. "),
+    HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"),
+    //small table file size
+    HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L,
+        "The threshold for the input file size of the small tables; if the file size is smaller \n" +
+        "than this threshold, it will try to convert the common join into map join"),
+    HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0,
+        "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."),
+    // test mode in hive mode
+    HIVETESTMODE("hive.test.mode", false,
+        "Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename.",
+        false),
+    HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_",
+        "In test mode, specfies prefixes for the output table", false),
+    HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32,
+        "In test mode, specfies sampling frequency for table, which is not bucketed,\n" +
+        "For example, the following query:\n" +
+        "  INSERT OVERWRITE TABLE dest SELECT col1 from src\n" +
+        "would be converted to\n" +
+        "  INSERT OVERWRITE TABLE test_dest\n" +
+        "  SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))", false),
+    HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", "",
+        "In test mode, specifies comma separated table names which would not apply sampling", false),
+    HIVETESTMODEDUMMYSTATAGGR("hive.test.dummystats.aggregator", "", "internal variable for test", false),
+    HIVETESTMODEDUMMYSTATPUB("hive.test.dummystats.publisher", "", "internal variable for test", false),
+    HIVETESTCURRENTTIMESTAMP("hive.test.currenttimestamp", null, "current timestamp for test", false),
+    HIVEMERGEMAPFILES("hive.merge.mapfiles", true,
+        "Merge small files at the end of a map-only job"),
+    HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false,
+        "Merge small files at the end of a map-reduce job"),
+    HIVEMERGETEZFILES("hive.merge.tezfiles", false, "Merge small files at the end of a Tez DAG"),
+    HIVEMERGESPARKFILES("hive.merge.sparkfiles", false, "Merge small files at the end of a Spark DAG Transformation"),
+    HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000),
+        "Size of merged files at the end of the job"),
+    HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000),
+        "When the average output file size of a job is less than this number, Hive will start an additional \n" +
+        "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" +
+        "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."),
+    HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""),
+    HIVEMERGEORCFILESTRIPELEVEL("hive.merge.orcfile.stripe.level", true,
+        "When hive.merge.mapfiles, hive.merge.mapredfiles or hive.merge.tezfiles is enabled\n" +
+        "while writing a table with ORC file format, enabling this config will do stripe-level\n" +
+        "fast merge for small ORC files. Note that enabling this config will not honor the\n" +
+        "padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
+    HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
+        "If this is set the header for RCFiles will simply be RCF.  If this is not\n" +
+        "set the header will be that borrowed from sequence files, e.g. SEQ- followed\n" +
+        "by the input and output RCFile formats."),
+    HIVEUSERCFILESYNCCACHE("hive.exec.rcfile.use.sync.cache", true, ""),
+    HIVE_RCFILE_RECORD_BUFFER_SIZE("", 4194304, ""),   // 4M
+    PARQUET_MEMORY_POOL_RATIO("parquet.memory.pool.ratio", 0.5f,
+        "Maximum fraction of heap that can be used by Parquet file writers in one task.\n" +
+        "It is for avoiding OutOfMemory error in tasks. Work with Parquet 1.6.0 and above.\n" +
+        "This config parameter is defined in Parquet, so that it does not start with 'hive.'."),
+    HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true,
+      "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" +
+      "on reading parquet files from other tools"),
+        "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" +
+        "Set this flag to true to interpret the value as seconds to be consistent with float/double." ),
+    HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f,
+        "Maximum fraction of heap that can be used by ORC file writers"),
+    HIVE_ORC_WRITE_FORMAT("hive.exec.orc.write.format", null,
+        "Define the version of the file to write. Possible values are 0.11 and 0.12.\n" +
+        "If this parameter is not defined, ORC will use the run length encoding (RLE)\n" +
+        "introduced in Hive 0.12. Any value other than 0.11 results in the 0.12 encoding."),
+    HIVE_ORC_DEFAULT_STRIPE_SIZE("hive.exec.orc.default.stripe.size",
+        64L * 1024 * 1024,
+        "Define the default ORC stripe size, in bytes."),
+    HIVE_ORC_DEFAULT_BLOCK_SIZE("hive.exec.orc.default.block.size", 256L * 1024 * 1024,
+        "Define the default file system block size for ORC files."),
+    HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold", 0.8f,
+        "If the number of keys in a dictionary is greater than this fraction of the total number of\n" +
+        "non-null rows, turn off dictionary encoding.  Use 1 to always use dictionary encoding."),
+    HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000,
+        "Define the default ORC index stride in number of rows. (Stride is the number of rows\n" +
+        "an index entry represents.)"),
+    HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK("hive.orc.row.index.stride.dictionary.check", true,
+        "If enabled dictionary check will happen after first row index stride (default 10000 rows)\n" +
+        "else dictionary check will happen before writing first stripe. In both cases, the decision\n" +
+        "to use dictionary or not will be retained thereafter."),
+    HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024,
+        "Define the default ORC buffer size, in bytes."),
+    HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true,
+        "Define the default block padding, which pads stripes to the HDFS block boundaries."),
+    HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f,
+        "Define the tolerance for block padding as a decimal fraction of stripe size (for\n" +
+        "example, the default value 0.05 is 5% of the stripe size). For the defaults of 64Mb\n" +
+        "ORC stripe and 256Mb HDFS blocks, the default block padding tolerance of 5% will\n" +
+        "reserve a maximum of 3.2Mb for padding within the 256Mb block. In that case, if the\n" +
+        "available size within the block is more than 3.2Mb, a new smaller stripe will be\n" +
+        "inserted to fit within that space. This will make sure that no stripe written will\n" +
+        "cross block boundaries and cause remote reads within a node local task."),
+    HIVE_ORC_DEFAULT_COMPRESS("hive.exec.orc.default.compress", "ZLIB", "Define the default compression codec for ORC file"),
+    HIVE_ORC_ENCODING_STRATEGY("hive.exec.orc.encoding.strategy", "SPEED", new StringSet("SPEED", "COMPRESSION"),
+        "Define the encoding strategy to use while writing data. Changing this will\n" +
+        "only affect the light weight encoding for integers. This flag will not\n" +
+        "change the compression level of higher level compression codec (like ZLIB)."),
+    HIVE_ORC_COMPRESSION_STRATEGY("hive.exec.orc.compression.strategy", "SPEED", new StringSet("SPEED", "COMPRESSION"),
+         "Define the compression strategy to use while writing data. \n" +
+         "This changes the compression level of higher level compression codec (like ZLIB)."),
+    HIVE_ORC_SPLIT_STRATEGY("hive.exec.orc.split.strategy", "HYBRID", new StringSet("HYBRID", "BI", "ETL"),
+        "This is not a user level config. BI strategy is used when the requirement is to spend less time in split generation" +
+        " as opposed to query execution (split generation does not read or cache file footers)." +
+        " ETL strategy is used when spending little more time in split generation is acceptable" +
+        " (split generation reads and caches file footers). HYBRID chooses between the above strategies" +
+        " based on heuristics."),
+        "Whether to enable using file metadata cache in metastore for ORC file footers."),
+    HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false,
+        "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" +
+        "data is read remotely (from the client or HS2 machine) and sent to all the tasks."),
+    HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS("hive.orc.splits.include.fileid", true,
+        "Include file ID in splits on file systems thaty support it."),
+    HIVE_ORC_CACHE_STRIPE_DETAILS_SIZE("hive.orc.cache.stripe.details.size", 10000,
+        "Max cache size for keeping meta info about orc splits cached in the client."),
+    HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10,
+        "How many threads orc should use to create splits in parallel."),
+        "If ORC reader encounters corrupt data, this value will be used to determine\n" +
+        "whether to skip the corrupt data or throw exception. The default behavior is to throw exception."),
+    HIVE_ORC_ZEROCOPY("hive.exec.orc.zerocopy", false,
+        "Use zerocopy reads with ORC. (This requires Hadoop 2.3 or later.)"),
+    HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false,
+        "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" +
+        "'1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.\n" +
+        "The default is false, which means only 'TRUE' and 'FALSE' are treated as legal\n" +
+        "boolean literal."),
+    HIVESKEWJOIN("hive.optimize.skewjoin", false,
+        "Whether to enable skew join optimization. \n" +
+        "The algorithm is as follows: At runtime, detect the keys with a large skew. Instead of\n" +
+        "processing those keys, store them temporarily in an HDFS directory. In a follow-up map-reduce\n" +
+        "job, process those skewed keys. The same key need not be skewed for all the tables, and so,\n" +
+        "the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a\n" +
+        "map-join."),
+    HIVEDYNAMICPARTITIONHASHJOIN("hive.optimize.dynamic.partition.hashjoin", false,
+        "Whether to enable dynamically partitioned hash join optimization. \n" +
+        "This setting is also dependent on enabling"),
+    HIVECONVERTJOIN("", true,
+        "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"),
+        "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" +
+        "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" +
+        "specified size, the join is directly converted to a mapjoin (there is no conditional task)."),
+        10000000L,
+        "If is off, this parameter does not take affect. \n" +
+        "However, if it is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, \n" +
+        "the join is directly converted to a mapjoin(there is no conditional task). The default is 10MB"),
+        "For conditional joins, if input stream from a small alias can be directly applied to join operator without \n" +
+        "filtering or projection, the alias need not to be pre-staged in distributed cache via mapred local task.\n" +
+        "Currently, this is not working with vectorization or tez execution engine."),
+    HIVESKEWJOINKEY("hive.skewjoin.key", 100000,
+        "Determine if we get a skew key in join. If we see more than the specified number of rows with the same key in join operator,\n" +
+        "we think the key as a skew join key. "),
+        "Determine the number of map task used in the follow up map join job for a skew join.\n" +
+        "It should be used together with hive.skewjoin.mapjoin.min.split to perform a fine grained control."),
+    HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L,
+        "Determine the number of map task at most used in the follow up map join job for a skew join by specifying \n" +
+        "the minimum split size. It should be used together with to perform a fine grained control."),
+    HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000,
+        "Send a heartbeat after this interval - used by mapjoin and filter operators"),
+    HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L,
+        "When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least."),
+    HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10,
+        "When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample."),
+    HIVELIMITOPTENABLE("hive.limit.optimize.enable", false,
+        "Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first."),
+    HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000,
+        "Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query. \n" +
+        "Insert queries are not restricted by this limit."),
+    HIVELIMITPUSHDOWNMEMORYUSAGE("hive.limit.pushdown.memory.usage", -1f,
+        "The max memory to be used for hash in RS operator for top K selection."),
+    HIVELIMITTABLESCANPARTITION("hive.limit.query.max.table.partition", -1,
+        "This controls how many partitions can be scanned for each partitioned table.\n" +
+        "The default value \"-1\" means no limit."),
+    HIVEHASHTABLEKEYCOUNTADJUSTMENT("hive.hashtable.key.count.adjustment", 1.0f,
+        "Adjustment to mapjoin hashtable size derived from table and column statistics; the estimate" +
+        " of the number of keys is divided by this value. If the value is 0, statistics are not used" +
+        "and hive.hashtable.initialCapacity is used instead."),
+    HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, "Initial capacity of " +
+        "mapjoin hashtable if statistics are absent, or if hive.hashtable.stats.key.estimate.adjustment is set to 0"),
+    HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75, ""),
+    HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55,
+        "This number means how much memory the local task can take to hold the key/value into an in-memory hash table \n" +
+        "when this map join is followed by a group by. If the local task's memory usage is more than this number, \n" +
+        "the local task will abort by itself. It means the data of the small table is too large to be held in memory."),
+    HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90,
+        "This number means how much memory the local task can take to hold the key/value into an in-memory hash table. \n" +
+        "If the local task's memory usage is more than this number, the local task will abort by itself. \n" +
+        "It means the data of the small table is too large to be held in memory."),
+    HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000,
+        "The number means after how many rows processed it needs to check the memory usage"),
+    HIVEDEBUGLOCALTASK("hive.debug.localtask",false, ""),
+    HIVEINPUTFORMAT("hive.input.format", "",
+        "The default input format. Set this to HiveInputFormat if you encounter problems with CombineHiveInputFormat."),
+    HIVETEZINPUTFORMAT("hive.tez.input.format", "",
+        "The default input format for tez. Tez groups splits in the AM."),
+    HIVETEZCONTAINERSIZE("hive.tez.container.size", -1,
+        "By default Tez will spawn containers of the size of a mapper. This can be used to overwrite."),
+    HIVETEZCPUVCORES("hive.tez.cpu.vcores", -1,
+        "By default Tez will ask for however many cpus map-reduce is configured to use per container.\n" +
+        "This can be used to overwrite."),
+    HIVETEZJAVAOPTS("", null,
+        "By default Tez will use the Java options from map tasks. This can be used to overwrite."),
+    HIVETEZLOGLEVEL("hive.tez.log.level", "INFO",
+        "The log level to use for tasks executing as part of the DAG.\n" +
+        "Used only if is used to configure Java options."),
+    HIVEENFORCEBUCKETING("hive.enforce.bucketing", false,
+        "Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced."),
+    HIVEENFORCESORTING("hive.enforce.sorting", false,
+        "Whether sorting is enforced. If true, while inserting into the table, sorting is enforced."),
+    HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true,
+        "If hive.enforce.bucketing or hive.enforce.sorting is true, don't create a reducer for enforcing \n" +
+        "bucketing/sorting for queries of the form: \n" +
+        "insert overwrite table T2 select * from T1;\n" +
+        "where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets."),
+    HIVEPARTITIONER("hive.mapred.partitioner", "", ""),
+    HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false,
+        "If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?"),
+    HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false,
+        "If the user asked for bucketed map-side join, and it cannot be performed, \n" +
+        "should the query fail or not ? For example, if the buckets in the tables being joined are\n" +
+        "not a multiple of each other, bucketed map-side join cannot be performed, and the\n" +
+        "query will fail if hive.enforce.bucketmapjoin is set to true."),
+        "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."),
+        "",
+        "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ",
+        "The policy to choose the big table for automatic conversion to sort-merge join. \n" +
+        "By default, the table with the largest partitions is assigned the big table. All policies are:\n" +
+        ". based on position of the table - the leftmost table is selected\n" +
+        "org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.\n" +
+        ". based on total size (all the partitions selected in the query) of the table \n" +
+        "org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.\


View raw message