carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jackylk <...@git.apache.org>
Subject [GitHub] incubator-carbondata pull request #362: [CARBONDATA-459] Block distribution ...
Date Tue, 29 Nov 2016 15:28:50 GMT
Github user jackylk commented on a diff in the pull request:

    https://github.com/apache/incubator-carbondata/pull/362#discussion_r90031916
  
    --- Diff: integration/spark/src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala
---
    @@ -101,47 +101,107 @@ object DistributionUtil {
        * Checking if the existing executors is greater than configured executors, if yes
        * returning configured executors.
        *
    -   * @param blockList
    +   * @param blockList total number of blocks in the identified segments
        * @param sparkContext
        * @return
        */
       def ensureExecutorsAndGetNodeList(blockList: Seq[Distributable],
         sparkContext: SparkContext): Seq[String] = {
         val nodeMapping = CarbonLoaderUtil.getRequiredExecutors(blockList.asJava)
    -    var confExecutorsTemp: String = null
    -    if (sparkContext.getConf.contains("spark.executor.instances")) {
    -      confExecutorsTemp = sparkContext.getConf.get("spark.executor.instances")
    -    } else if (sparkContext.getConf.contains("spark.dynamicAllocation.enabled")
    -               && sparkContext.getConf.get("spark.dynamicAllocation.enabled").trim
    -                 .equalsIgnoreCase("true")) {
    -      if (sparkContext.getConf.contains("spark.dynamicAllocation.maxExecutors")) {
    -        confExecutorsTemp = sparkContext.getConf.get("spark.dynamicAllocation.maxExecutors")
    +    ensureExecutorsByNumberAndGetNodeList(nodeMapping, blockList, sparkContext)
    +  }
    +
    +  /**
    +   * This method will ensure that the required/configured number of executors are requested
    +   * for processing the identified blocks
    +   *
    +   * @param nodeMapping
    +   * @param blockList
    +   * @param sparkContext
    +   * @return
    +   */
    +  private def ensureExecutorsByNumberAndGetNodeList(nodeMapping: java.util.Map[String,
java.util
    +  .List[Distributable]], blockList: Seq[Distributable],
    +      sparkContext: SparkContext): Seq[String] = {
    +    val nodesOfData = nodeMapping.size()
    +    val confExecutors: Int = getConfiguredExecutors(sparkContext)
    +    LOGGER.info("Executors configured : " + confExecutors)
    +    val requiredExecutors = if (nodesOfData < 1 || nodesOfData > confExecutors)
{
    +      confExecutors
    +    } else if (confExecutors > nodesOfData) {
    +      // this case will come only if dynamic allocation is true
    +      var totalExecutorsToBeRequested = nodesOfData
    +      // If total number of blocks are greater than the nodes identified then ensure
    +      // that the configured number of max executors can be opened based on the difference
of
    +      // block list size and nodes identified
    +      if (blockList.size > nodesOfData) {
    +        // e.g 1. blockList size = 40, nodesOfData = 3, confExecutors = 6, then all executors
    +        // need to be opened
    +        // 2. blockList size = 4, nodesOfData = 3, confExecutors = 6, then
    +        // total 4 executors need to be opened
    +        val extraExecutorsToBeRequested = blockList.size - nodesOfData
    +        if (extraExecutorsToBeRequested > confExecutors) {
    +          totalExecutorsToBeRequested = confExecutors
    +        } else {
    +          totalExecutorsToBeRequested = nodesOfData + extraExecutorsToBeRequested
    +        }
           }
    +      LOGGER.info("Total executors requested: " + totalExecutorsToBeRequested)
    +      totalExecutorsToBeRequested
    +    } else {
    +      nodesOfData
         }
     
    -    val confExecutors = if (null != confExecutorsTemp) {
    -      confExecutorsTemp.toInt
    +    val startTime = System.currentTimeMillis();
    +    if (sparkContext.getConf.getBoolean("spark.dynamicAllocation.enabled", false)) {
    +      // this case will come only if dynamic allocation is true
    +      CarbonContext
    +        .ensureExecutors(sparkContext, requiredExecutors, blockList.size, Map.empty)
         } else {
    -      1
    +      CarbonContext.ensureExecutors(sparkContext, requiredExecutors)
         }
    -    val requiredExecutors = if (nodeMapping.size > confExecutors) {
    -      confExecutors
    +    getDistinctNodesList(sparkContext, requiredExecutors, startTime)
    +  }
    +
    +  /**
    +   * This method will return the configured executors
    +   *
    +   * @param sparkContext
    +   * @return
    +   */
    +  private def getConfiguredExecutors(sparkContext: SparkContext): Int = {
    +    var confExecutors: Int = 0
    +    if (sparkContext.getConf.getBoolean("spark.dynamicAllocation.enabled", false)) {
    +      // default value for spark.dynamicAllocation.maxExecutors is infinity
    +      confExecutors = sparkContext.getConf.getInt("spark.dynamicAllocation.maxExecutors",
1)
         } else {
    -      nodeMapping.size()
    +      // default value for spark.executor.instances is 2
    +      confExecutors = sparkContext.getConf.getInt("spark.executor.instances", 1)
         }
    +    confExecutors
    +  }
     
    -    val startTime = System.currentTimeMillis()
    -    CarbonContext.ensureExecutors(sparkContext, requiredExecutors)
    +  /**
    +   * This method will return the distinct nodes list
    +   *
    +   * @param sparkContext
    +   * @param requiredExecutors
    +   * @param startTime
    +   * @return
    +   */
    +  private def getDistinctNodesList(sparkContext: SparkContext,
    +      requiredExecutors: Int,
    +      startTime: Long): Seq[String] = {
         var nodes = DistributionUtil.getNodeList(sparkContext)
    -    var maxTimes = 30
    +    var maxTimes = 10;
         while (nodes.length < requiredExecutors && maxTimes > 0) {
    -      Thread.sleep(500)
    +      Thread.sleep(500);
           nodes = DistributionUtil.getNodeList(sparkContext)
    -      maxTimes = maxTimes - 1
    +      maxTimes = maxTimes - 1;
         }
    -    val timDiff = System.currentTimeMillis() - startTime
    -    LOGGER.info(s"Total Time taken to ensure the required executors: $timDiff")
    -    LOGGER.info(s"Time elapsed to allocate the required executors: ${ (30 - maxTimes)
* 500 }")
    -    nodes.distinct
    +    val timDiff = System.currentTimeMillis() - startTime;
    +    LOGGER.info("Total Time taken to ensure the required executors : " + timDiff)
    --- End diff --
    
    I think does not need to change this, use s"" style


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message