carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject [1/2] incubator-carbondata git commit: Use static string to set Hadoop configuration
Date Sat, 03 Sep 2016 13:08:08 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 79c423484 -> 9c2d70339


Use static string to set Hadoop configuration


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/b31152db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/b31152db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/b31152db

Branch: refs/heads/master
Commit: b31152db18535cd8b162f9894926216ff2e0588d
Parents: 79c4234
Author: Zuo Wang <wangzuo.nj@qq.com>
Authored: Fri Sep 2 17:01:55 2016 +0800
Committer: chenliang613 <chenliang613@apache.org>
Committed: Sat Sep 3 21:06:12 2016 +0800

----------------------------------------------------------------------
 .../apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala  | 9 ++++-----
 .../src/main/scala/org/apache/spark/util/SplitUtils.scala   | 6 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b31152db/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
index 0d0ac97..8f4bd06 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
@@ -28,7 +28,7 @@ import scala.util.control.Breaks._
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.mapreduce.Job
-import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
 import org.apache.spark.{util => _, _}
 import org.apache.spark.sql.{CarbonEnv, SQLContext}
 import org.apache.spark.sql.execution.command.{AlterTableModel, CompactionCallableModel,
CompactionModel, Partitioner}
@@ -219,8 +219,7 @@ object CarbonDataRDDFactory extends Logging {
       if (newSplitSize < CarbonCommonConstants.CARBON_16MB) {
         newSplitSize = CarbonCommonConstants.CARBON_16MB
       }
-      hadoopConfiguration.set(
-        "mapreduce.input.fileinputformat.split.maxsize", newSplitSize.toString)
+      hadoopConfiguration.set(FileInputFormat.SPLIT_MAXSIZE, newSplitSize.toString)
       logInfo("totalInputSpaceConsumed : " + spaceConsumed +
         " , defaultParallelism : " + defaultParallelism)
       logInfo("mapreduce.input.fileinputformat.split.maxsize : " + newSplitSize.toString)
@@ -886,8 +885,8 @@ object CarbonDataRDDFactory extends Logging {
           val hadoopConfiguration = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
           // FileUtils will skip file which is no csv, and return all file path which split
by ','
           val filePaths = carbonLoadModel.getFactFilePath
-          hadoopConfiguration.set("mapreduce.input.fileinputformat.inputdir", filePaths)
-          hadoopConfiguration.set("mapreduce.input.fileinputformat.input.dir.recursive",
"true")
+          hadoopConfiguration.set(FileInputFormat.INPUT_DIR, filePaths)
+          hadoopConfiguration.set(FileInputFormat.INPUT_DIR_RECURSIVE, "true")
 
           configSplitMaxSize(sqlContext.sparkContext, filePaths, hadoopConfiguration)
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/b31152db/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala b/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
index c3cc6d9..22713da 100644
--- a/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.util
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.{LongWritable, Text}
-import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
 import org.apache.spark.rdd.{NewHadoopPartition, NewHadoopRDD}
 import org.apache.spark.SparkContext
 
@@ -44,8 +44,8 @@ object SplitUtils {
       // clone the hadoop configuration
       val hadoopConfiguration = new Configuration(sc.hadoopConfiguration)
       // set folder or file
-      hadoopConfiguration.set("mapreduce.input.fileinputformat.inputdir", filePath)
-      hadoopConfiguration.set("mapreduce.input.fileinputformat.input.dir.recursive", "true")
+      hadoopConfiguration.set(FileInputFormat.INPUT_DIR, filePath)
+      hadoopConfiguration.set(FileInputFormat.INPUT_DIR_RECURSIVE, "true")
       val newHadoopRDD = new NewHadoopRDD[LongWritable, Text](
         sc,
         classOf[org.apache.hadoop.mapreduce.lib.input.TextInputFormat],


Mime
View raw message