spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject git commit: [SPARK-2953] Allow using short names for io compression codecs
Date Wed, 13 Aug 2014 05:50:45 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 cffd9bb8d -> 837bf60fd


[SPARK-2953] Allow using short names for io compression codecs

Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if
Spark just accepts "lz4", "lzf", "snappy".

Author: Reynold Xin <rxin@apache.org>

Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits:

9f50962 [Reynold Xin] Specify short-form compression codec names first.
63f78ee [Reynold Xin] Updated configuration documentation.
47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs

(cherry picked from commit 676f98289dad61c091bb45bd35a2b9613b22d64a)
Signed-off-by: Reynold Xin <rxin@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/837bf60f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/837bf60f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/837bf60f

Branch: refs/heads/branch-1.1
Commit: 837bf60fd0e4597a50c917ad637d7fee4ff47a9a
Parents: cffd9bb
Author: Reynold Xin <rxin@apache.org>
Authored: Tue Aug 12 22:50:29 2014 -0700
Committer: Reynold Xin <rxin@apache.org>
Committed: Tue Aug 12 22:50:42 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/io/CompressionCodec.scala    | 11 +++++++++--
 .../apache/spark/io/CompressionCodecSuite.scala   | 18 ++++++++++++++++++
 docs/configuration.md                             |  8 +++++---
 3 files changed, 32 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/837bf60f/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 1b66218..ef9c43e 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -46,17 +46,24 @@ trait CompressionCodec {
 
 
 private[spark] object CompressionCodec {
+
+  private val shortCompressionCodecNames = Map(
+    "lz4" -> classOf[LZ4CompressionCodec].getName,
+    "lzf" -> classOf[LZFCompressionCodec].getName,
+    "snappy" -> classOf[SnappyCompressionCodec].getName)
+
   def createCodec(conf: SparkConf): CompressionCodec = {
     createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
   }
 
   def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
-    val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
+    val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
+    val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
       .getConstructor(classOf[SparkConf])
     ctor.newInstance(conf).asInstanceOf[CompressionCodec]
   }
 
-  val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName
+  val DEFAULT_COMPRESSION_CODEC = "snappy"
 }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/837bf60f/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 3f882a7..25be7f2 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
     testCodec(codec)
   }
 
+  test("lz4 compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lz4")
+    assert(codec.getClass === classOf[LZ4CompressionCodec])
+    testCodec(codec)
+  }
+
   test("lzf compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
     assert(codec.getClass === classOf[LZFCompressionCodec])
     testCodec(codec)
   }
 
+  test("lzf compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "lzf")
+    assert(codec.getClass === classOf[LZFCompressionCodec])
+    testCodec(codec)
+  }
+
   test("snappy compression codec") {
     val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
     assert(codec.getClass === classOf[SnappyCompressionCodec])
     testCodec(codec)
   }
+
+  test("snappy compression codec short form") {
+    val codec = CompressionCodec.createCodec(conf, "snappy")
+    assert(codec.getClass === classOf[SnappyCompressionCodec])
+    testCodec(codec)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/837bf60f/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 617a72a..8136bd6 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may
be useful
 </tr>
 <tr>
   <td><code>spark.io.compression.codec</code></td>
-  <td>org.apache.spark.io.<br />SnappyCompressionCodec</td>
+  <td>snappy</td>
   <td>
-    The codec used to compress internal data such as RDD partitions and shuffle outputs.
-    By default, Spark provides three codecs:  <code>org.apache.spark.io.LZ4CompressionCodec</code>,
+    The codec used to compress internal data such as RDD partitions and shuffle outputs.
By default,
+    Spark provides three codecs: <code>lz4</code>, <code>lzf</code>,
and <code>snappy</code>. You
+    can also use fully qualified class names to specify the codec, e.g.
+    <code>org.apache.spark.io.LZ4CompressionCodec</code>,
     <code>org.apache.spark.io.LZFCompressionCodec</code>,
     and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
   </td>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message