avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1037892 - in /avro/trunk: ./ lang/java/src/java/org/apache/avro/file/ lang/java/src/java/org/apache/avro/tool/ lang/java/src/test/java/org/apache/avro/tool/
Date Mon, 22 Nov 2010 21:16:48 GMT
Author: cutting
Date: Mon Nov 22 21:16:45 2010
New Revision: 1037892

URL: http://svn.apache.org/viewvc?rev=1037892&view=rev
Log:
AVRO-684.  Java: Add command-line recodec tool to change file compression codecs.  Contributed
by Patrick Linehan.

Added:
    avro/trunk/lang/java/src/java/org/apache/avro/tool/RecodecTool.java
    avro/trunk/lang/java/src/test/java/org/apache/avro/tool/TestRecodecTool.java
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/src/java/org/apache/avro/file/DataFileWriter.java
    avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1037892&r1=1037891&r2=1037892&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Nov 22 21:16:45 2010
@@ -17,6 +17,11 @@ Avro 1.5.0 (unreleased)
     AVRO-696. Java: Make DataFileWriter.setMetaInternal(String,String)
     private. (Patrick Linehan via cutting)    
 
+  NEW FEATURES
+
+    AVRO-684. Java: Add command-line "recodec" tool to change file
+    compression codecs.  (Patrick Linehan via cutting)
+
   IMPROVEMENTS
 
     AVRO-682. Java: Add method DataFileStream.getMetaKeys().

Modified: avro/trunk/lang/java/src/java/org/apache/avro/file/DataFileWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/file/DataFileWriter.java?rev=1037892&r1=1037891&r2=1037892&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/file/DataFileWriter.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/file/DataFileWriter.java Mon Nov 22 21:16:45
2010
@@ -216,13 +216,13 @@ public class DataFileWriter<D> implement
 
   /** Set a metadata property. */
   public DataFileWriter<D> setMeta(String key, byte[] value) {
-    if (isReserved(key)) {
+    if (isReservedMeta(key)) {
       throw new AvroRuntimeException("Cannot set reserved meta key: " + key);
     }
     return setMetaInternal(key, value);
   }
   
-  private boolean isReserved(String key) {
+  public static boolean isReservedMeta(String key) {
     return key.startsWith("avro.");
   }
 

Modified: avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java?rev=1037892&r1=1037891&r2=1037892&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/Main.java Mon Nov 22 21:16:45 2010
@@ -47,6 +47,7 @@ public class Main {
         new DataFileWriteTool(),
         new DataFileGetSchemaTool(),
         new IdlTool(),
+        new RecodecTool(),
         new RpcReceiveTool(),
         new RpcSendTool(),
         new FromTextTool(),

Added: avro/trunk/lang/java/src/java/org/apache/avro/tool/RecodecTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/tool/RecodecTool.java?rev=1037892&view=auto
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/tool/RecodecTool.java (added)
+++ avro/trunk/lang/java/src/java/org/apache/avro/tool/RecodecTool.java Mon Nov 22 21:16:45
2010
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.List;
+import java.util.zip.Deflater;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+
+/** Tool to alter the codec of an Avro data file. */
+public class RecodecTool implements Tool {
+  @Override
+  public int run(InputStream in, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+
+    OptionParser optParser = new OptionParser();
+    OptionSpec<String> codecOpt = optParser
+      .accepts("codec", "Compression codec")
+      .withRequiredArg()
+      .defaultsTo("null")
+      .ofType(String.class);
+    OptionSpec<String> levelOpt = optParser
+      .accepts("level", "Compression level (only applies to deflate)")
+      .withRequiredArg()
+      .defaultsTo("" + Deflater.DEFAULT_COMPRESSION)
+      .ofType(String.class);
+    OptionSet opts = optParser.parse(args.toArray(new String[0]));
+
+    List<String> nargs = opts.nonOptionArguments();
+    if (nargs.size() > 2) {
+      err.println("Expected at most an input file and output file.");
+      optParser.printHelpOn(err);
+      return 1;
+    }
+    InputStream input = in;
+    boolean inputNeedsClosing = false;
+    if (nargs.size() > 0 && !nargs.get(0).equals("-")) {
+      input = new FileInputStream(nargs.get(0));
+      inputNeedsClosing = true;
+    }
+    OutputStream output = out;
+    boolean outputNeedsClosing = false;
+    if (nargs.size() > 1 && !nargs.get(1).equals("-")) {
+      output = new FileOutputStream(nargs.get(1));
+      outputNeedsClosing = true;
+    }
+
+    DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
+        input, new GenericDatumReader<GenericRecord>());
+    Schema schema = reader.getSchema();
+    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
+        new GenericDatumWriter<GenericRecord>());
+    CodecFactory codec = opts.valueOf(codecOpt).equals("deflate")
+        ? CodecFactory.deflateCodec(Integer.parseInt(levelOpt.value(opts)))
+        : CodecFactory.fromString(codecOpt.value(opts));
+    writer.setCodec(codec);
+    for (String key : reader.getMetaKeys()) {
+      if (!DataFileWriter.isReservedMeta(key)) {
+        writer.setMeta(key, reader.getMeta(key));
+      }
+    }
+    writer.create(schema, output);
+
+    writer.appendAllFrom(reader, true);
+    writer.flush();
+
+    if (inputNeedsClosing) {
+      input.close();
+    }
+    if (outputNeedsClosing) {
+      output.close();
+    }
+    return 0;
+  }
+
+  @Override
+  public String getName() {
+    return "recodec";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Alters the codec of a data file.";
+  }
+}

Added: avro/trunk/lang/java/src/test/java/org/apache/avro/tool/TestRecodecTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/tool/TestRecodecTool.java?rev=1037892&view=auto
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/tool/TestRecodecTool.java (added)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/tool/TestRecodecTool.java Mon Nov 22
21:16:45 2010
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import static java.util.Arrays.asList;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.PrintStream;
+import java.util.ArrayList;
+
+import org.apache.avro.AvroTestUtil;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestRecodecTool {
+  @Test
+  public void testRecodec() throws Exception {
+    String metaKey = "myMetaKey";
+    String metaValue = "myMetaValue";
+    
+    File inputFile = AvroTestUtil.tempFile("input.avro");
+    
+    Schema schema = Schema.create(Type.STRING);
+    DataFileWriter<String> writer = new DataFileWriter<String>(
+        new GenericDatumWriter<String>(schema))
+        .setMeta(metaKey, metaValue)
+        .create(schema, inputFile);
+    // We write some garbage which should be quite compressible by deflate,
+    // but is complicated enough that deflate-9 will work better than deflate-1.
+    // These values were plucked from thin air and worked on the first try, so
+    // don't read too much into them.
+    for (int i = 0; i < 100000; i++) {
+      writer.append("" + i % 100);
+    }
+    writer.close();
+
+    File defaultOutputFile = AvroTestUtil.tempFile("default-output.avro");
+    File nullOutputFile = AvroTestUtil.tempFile("null-output.avro");
+    File deflateDefaultOutputFile = AvroTestUtil.tempFile("deflate-default-output.avro");
+    File deflate1OutputFile = AvroTestUtil.tempFile("deflate-1-output.avro");
+    File deflate9OutputFile = AvroTestUtil.tempFile("deflate-9-output.avro");
+    
+    new RecodecTool().run(new FileInputStream(inputFile), new PrintStream(defaultOutputFile),
null, new ArrayList<String>());
+    new RecodecTool().run(new FileInputStream(inputFile), new PrintStream(nullOutputFile),
null, asList("--codec=null"));
+    new RecodecTool().run(new FileInputStream(inputFile), new PrintStream(deflateDefaultOutputFile),
null, asList("--codec=deflate"));
+    new RecodecTool().run(new FileInputStream(inputFile), new PrintStream(deflate1OutputFile),
null, asList("--codec=deflate", "--level=1"));
+    new RecodecTool().run(new FileInputStream(inputFile), new PrintStream(deflate9OutputFile),
null, asList("--codec=deflate", "--level=9"));
+    
+    // We assume that metadata copying is orthogonal to codec selection, and
+    // so only test it for a single file.
+    Assert.assertEquals(
+      metaValue,
+      new DataFileReader<Void>(defaultOutputFile, new GenericDatumReader<Void>())
+        .getMetaString(metaKey));
+    
+    // The "default" codec should be the same as null.
+    Assert.assertEquals(defaultOutputFile.length(), nullOutputFile.length());
+    
+    // All of the deflated files should be smaller than the null file.
+    assertLessThan(deflateDefaultOutputFile.length(), nullOutputFile.length());
+    assertLessThan(deflate1OutputFile.length(), nullOutputFile.length());
+    assertLessThan(deflate9OutputFile.length(), nullOutputFile.length());
+    
+    // The "level 9" file should be smaller than the "level 1" file.
+    assertLessThan(deflate9OutputFile.length(), deflate1OutputFile.length());
+    
+//    System.err.println(inputFile.length());
+//    System.err.println(defaultOutputFile.length());
+//    System.err.println(nullOutputFile.length());
+//    System.err.println(deflateDefaultOutputFile.length());
+//    System.err.println(deflate1OutputFile.length());
+//    System.err.println(deflate9OutputFile.length());
+    
+    inputFile.delete();
+    defaultOutputFile.delete();
+    nullOutputFile.delete();
+    deflateDefaultOutputFile.delete();
+    deflate1OutputFile.delete();
+    deflate9OutputFile.delete();
+  }
+  
+  private static void assertLessThan(long less, long more) {
+    if (less >= more) {
+      Assert.fail("Expected " + less + " to be less than " + more);
+    }
+  }
+}



Mime
View raw message