carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [1/2] incubator-carbondata git commit: Only return 1 preferred loc to confirm each node run 1 task
Date Thu, 30 Mar 2017 05:35:43 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 32bb7fef1 -> 5d2ae6be2


Only return 1 preferred loc to confirm each node run 1 task

Add comments

Remove unused code

Remove unused import

Fix style for mkString output


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/cbca5d38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/cbca5d38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/cbca5d38

Branch: refs/heads/master
Commit: cbca5d38e6c58f6400d2fd5db8e6f0e8b458cc1f
Parents: 32bb7fe
Author: l00251599 <l00251599@huaweiobz.com>
Authored: Wed Mar 29 17:07:50 2017 +0800
Committer: jackylk <jacky.likun@huawei.com>
Committed: Thu Mar 30 11:01:23 2017 +0530

----------------------------------------------------------------------
 .../spark/rdd/NewCarbonDataLoadRDD.scala        | 32 ++++++--------------
 1 file changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/cbca5d38/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 50894d4..0690ba1 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -20,7 +20,6 @@ package org.apache.carbondata.spark.rdd
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 import java.nio.ByteBuffer
 import java.text.SimpleDateFormat
-import java.util
 import java.util.{Date, UUID}
 
 import scala.collection.JavaConverters._
@@ -352,28 +351,15 @@ class NewCarbonDataLoadRDD[K, V](
     } else {
       val theSplit = split.asInstanceOf[CarbonNodePartition]
       val firstOptionLocation: Seq[String] = List(theSplit.serializableHadoopSplit)
-      logInfo("Preferred Location for split : " + firstOptionLocation.head)
-      val blockMap = new util.LinkedHashMap[String, Integer]()
-      val tableBlocks = theSplit.blocksDetails
-      tableBlocks.foreach { tableBlock =>
-        tableBlock.getLocations.foreach { location =>
-          if (!firstOptionLocation.exists(location.equalsIgnoreCase(_))) {
-            val currentCount = blockMap.get(location)
-            if (currentCount == null) {
-              blockMap.put(location, 1)
-            } else {
-              blockMap.put(location, currentCount + 1)
-            }
-          }
-        }
-      }
-
-      val sortedList = blockMap.entrySet().asScala.toSeq.sortWith { (nodeCount1, nodeCount2)
=>
-        nodeCount1.getValue > nodeCount2.getValue
-      }
-
-      val sortedNodesList = sortedList.map(nodeCount => nodeCount.getKey).take(2)
-      firstOptionLocation ++ sortedNodesList
+      logInfo("Preferred Location for split : " + firstOptionLocation.mkString(","))
+      /**
+       * At original logic, we were adding the next preferred location so that in case of
the
+       * failure the Spark should know where to schedule the failed task.
+       * Remove the next preferred location is because some time Spark will pick the same
node
+       * for 2 tasks, so one node is getting over loaded with the task and one have no task
to
+       * do. And impacting the performance despite of any failure.
+       */
+      firstOptionLocation
     }
   }
 }


Mime
View raw message