hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chinnar...@apache.org
Subject hive git commit: HIVE-13217 : Replication for HoS mapjoin small file needs to respect dfs.replication.max (Chinna Rao L , via Szehon Ho)
Date Thu, 24 Mar 2016 08:35:23 GMT
Repository: hive
Updated Branches:
  refs/heads/master d8705a12f -> 55383d815


HIVE-13217 : Replication for HoS mapjoin small file needs to respect dfs.replication.max (Chinna
Rao L , via Szehon Ho)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55383d81
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55383d81
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55383d81

Branch: refs/heads/master
Commit: 55383d815420b9d1e8ad364dd2efa8ac894abcb5
Parents: d8705a1
Author: Chinna Rao L <chinnaraol@apache.org>
Authored: Thu Mar 24 14:03:16 2016 +0530
Committer: Chinna Rao L <chinnaraol@apache.org>
Committed: Thu Mar 24 14:03:16 2016 +0530

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/SparkHashTableSinkOperator.java      | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/55383d81/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
index 85344fc..5837614 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SparkHashTableSinkOperator.java
@@ -47,11 +47,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
 public class SparkHashTableSinkOperator
     extends TerminalOperator<SparkHashTableSinkDesc> implements Serializable {
-  private static final int MIN_REPLICATION = 10;
   private static final long serialVersionUID = 1L;
   private final String CLASS_NAME = this.getClass().getName();
   private final PerfLogger perfLogger = SessionState.getPerfLogger();
   protected static final Logger LOG = LoggerFactory.getLogger(SparkHashTableSinkOperator.class.getName());
+  public static final String DFS_REPLICATION_MAX = "dfs.replication.max";
+  private int minReplication = 10;
 
   private final HashTableSinkOperator htsOperator;
 
@@ -73,6 +74,9 @@ public class SparkHashTableSinkOperator
     byte tag = conf.getTag();
     inputOIs[tag] = inputObjInspectors[0];
     conf.setTagOrder(new Byte[]{ tag });
+    int dfsMaxReplication = hconf.getInt(DFS_REPLICATION_MAX, minReplication);
+    // minReplication value should not cross the value of dfs.replication.max
+    minReplication = Math.min(minReplication, dfsMaxReplication);
     htsOperator.setConf(conf);
     htsOperator.initialize(hconf, inputOIs);
   }
@@ -151,7 +155,7 @@ public class SparkHashTableSinkOperator
     }
     // TODO find out numOfPartitions for the big table
     int numOfPartitions = replication;
-    replication = (short) Math.max(MIN_REPLICATION, numOfPartitions);
+    replication = (short) Math.max(minReplication, numOfPartitions);
     htsOperator.console.printInfo(Utilities.now() + "\tDump the side-table for tag: " + tag
       + " with group count: " + tableContainer.size() + " into file: " + path);
     // get the hashtable file and path


Mime
View raw message