avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1480766 - in /avro/trunk: ./ lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/tools/src/test/java/org/apache/avro/tool/
Date Thu, 09 May 2013 20:55:06 GMT
Author: cutting
Date: Thu May  9 20:55:06 2013
New Revision: 1480766

URL: http://svn.apache.org/r1480766
Log:
AVRO-1319. Java: Add command line tools to generate random data files and to convert Avro
to Trevni.

Added:
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
  (with props)
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java   (with
props)
    avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
  (with props)
    avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java  
(with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1480766&r1=1480765&r2=1480766&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu May  9 20:55:06 2013
@@ -9,6 +9,9 @@ Trunk (not yet released)
 
     AVRO-1274. Java: Add a schema builder API. (tomwhite)
 
+    AVRO-1319. Java: Add command line tools to generate random data
+    files and to convert Avro to Trevni.  (cutting)
+
   IMPROVEMENTS
 
     AVRO-1260. Ruby: Improve read performance. (Martin Kleppmann via cutting)

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java?rev=1480766&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
(added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
Thu May  9 20:55:06 2013
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.CodecFactory;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.trevni.avro.RandomData;
+
+/** Creates a file filled with randomly-generated instances of a schema. */
+public class CreateRandomFileTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "random";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Creates a file with randomly generated instances of a schema.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+
+    OptionParser p = new OptionParser();
+    OptionSpec<Integer> count =
+      p.accepts("count", "Record Count")
+      .withRequiredArg()
+      .ofType(Integer.class);
+    OptionSpec<String> codec =
+      p.accepts("codec", "Compression codec")
+      .withRequiredArg()
+      .defaultsTo("null")
+      .ofType(String.class);
+    OptionSpec<String> file =
+        p.accepts("schema-file", "Schema File")
+        .withOptionalArg()
+        .ofType(String.class);
+    OptionSpec<String> inschema =
+        p.accepts("schema", "Schema")
+        .withOptionalArg()
+        .ofType(String.class);
+    OptionSet opts = p.parse(args.toArray(new String[0]));
+    if (opts.nonOptionArguments().size() != 1) {
+      err.println("Usage: outFile (filename or '-' for stdout)");
+      p.printHelpOn(err);
+      return 1;
+    }
+    args = opts.nonOptionArguments();
+
+    String schemastr = inschema.value(opts);
+    String schemafile = file.value(opts);
+    if (schemastr == null && schemafile == null) {
+        err.println("Need input schema (--schema-file) or (--schema)");
+        p.printHelpOn(err);
+        return 1;
+    }
+    Schema schema = (schemafile != null)
+        ? new Schema.Parser().parse(Util.openFromFS(schemafile))
+        : new Schema.Parser().parse(schemastr);
+
+    DataFileWriter<Object> writer =
+      new DataFileWriter<Object>(new GenericDatumWriter<Object>());
+    writer.setCodec(CodecFactory.fromString(codec.value(opts)));
+    writer.create(schema, Util.fileOrStdout(args.get(0), out));
+
+    for (Object datum : new RandomData(schema, (int)count.value(opts)))
+      writer.append(datum);
+
+    writer.close();
+
+    return 0;
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/CreateRandomFileTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java?rev=1480766&r1=1480765&r2=1480766&view=diff
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java (original)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/Main.java Thu May  9 20:55:06
2013
@@ -40,6 +40,7 @@ public class Main {
         new InduceSchemaTool(),
         new JsonToBinaryFragmentTool(),
         new BinaryFragmentToJsonTool(),
+        new CreateRandomFileTool(),
         new DataFileReadTool(),
         new DataFileWriteTool(),
         new DataFileGetMetaTool(),
@@ -52,6 +53,7 @@ public class Main {
         new RpcProtocolTool(),
         new FromTextTool(),
         new ToTextTool(),
+        new ToTrevniTool(),
         new TetherTool(),
         new TrevniCreateRandomTool(),
         new TrevniMetadataTool(),

Added: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java?rev=1480766&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java (added)
+++ avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java Thu May
 9 20:55:06 2013
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.util.List;
+
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+
+import org.apache.trevni.ColumnFileMetaData;
+import org.apache.trevni.avro.AvroColumnWriter;
+
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+
+
+/** Reads an Avro data file and writes a Trevni file. */
+public class ToTrevniTool implements Tool {
+
+  @Override
+  public String getName() {
+    return "totrevni";
+  }
+
+  @Override
+  public String getShortDescription() {
+    return "Converts an Avro data file to a Trevni file.";
+  }
+
+  @Override
+  public int run(InputStream stdin, PrintStream out, PrintStream err,
+      List<String> args) throws Exception {
+
+    OptionParser p = new OptionParser();
+    OptionSpec<String> codec =
+      p.accepts("codec", "Compression codec")
+      .withRequiredArg()
+      .defaultsTo("null")
+      .ofType(String.class);
+    OptionSet opts = p.parse(args.toArray(new String[0]));
+    if (opts.nonOptionArguments().size() != 2) {
+      err.println("Usage: inFile outFile (filenames or '-' for stdin/stdout)");
+      p.printHelpOn(err);
+      return 1;
+    }
+    args = opts.nonOptionArguments();
+
+    DataFileStream<Object> reader =
+      new DataFileStream(Util.fileOrStdin(args.get(0), stdin),
+                         new GenericDatumReader<Object>());
+    OutputStream outs = Util.fileOrStdout(args.get(1), out);
+    AvroColumnWriter<Object> writer =
+      new AvroColumnWriter<Object>(reader.getSchema(),
+                                   new ColumnFileMetaData()
+                                   .setCodec(codec.value(opts)));
+    for (Object datum : reader)
+      writer.write(datum);
+    writer.writeTo(outs);
+    outs.close();
+    reader.close();
+    return 0;
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/main/java/org/apache/avro/tool/ToTrevniTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java?rev=1480766&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
(added)
+++ avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
Thu May  9 20:55:06 2013
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Iterator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.trevni.avro.RandomData;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class TestCreateRandomFileTool {
+  private static final String COUNT = System.getProperty("test.count", "200");
+  private static final File DIR
+    = new File(System.getProperty("test.dir", "/tmp"));
+  private static final File OUT_FILE = new File(DIR, "random.avro");
+  private static final File SCHEMA_FILE =
+    new File("../../../share/test/schemas/weather.avsc");
+
+  private String run(List<String> args) throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    new CreateRandomFileTool().run(null, p, null, args);
+    return baos.toString("UTF-8").replace("\r", "");
+  }
+  
+  public void check(String... extraArgs) throws Exception {
+    ArrayList<String> args = new ArrayList<String>();
+    args.addAll(Arrays.asList(new String[] {
+        OUT_FILE.toString(),
+        "--count", COUNT,
+        "--schema-file", SCHEMA_FILE.toString()
+        }));
+    args.addAll(Arrays.asList(extraArgs));
+    run(args);
+
+    DataFileReader<Object> reader =
+      new DataFileReader(OUT_FILE, new GenericDatumReader<Object>());
+    
+    Iterator<Object> found = reader.iterator();
+    for (Object expected :
+           new RandomData(Schema.parse(SCHEMA_FILE), Integer.parseInt(COUNT)))
+      assertEquals(expected, found.next());
+
+    reader.close();
+  }
+
+  @Test
+  public void testSimple() throws Exception {
+    check();
+  }
+
+  @Test
+  public void testCodec() throws Exception {
+    check("--codec", "snappy");
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestCreateRandomFileTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java?rev=1480766&view=auto
==============================================================================
--- avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java (added)
+++ avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java Thu
May  9 20:55:06 2013
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.tool;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Iterator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.trevni.avro.AvroColumnReader;
+import org.apache.trevni.avro.RandomData;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+public class TestToTrevniTool {
+  private static final int COUNT =
+    Integer.parseInt(System.getProperty("test.count", "200"));
+  private static final File DIR
+    = new File(System.getProperty("test.dir", "/tmp"));
+  private static final File AVRO_FILE = new File(DIR, "random.avro");
+  private static final File TREVNI_FILE = new File(DIR, "random.trv");
+  private static final File SCHEMA_FILE =
+    new File("../../../share/test/schemas/weather.avsc");
+
+  private String run(String... args) throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream p = new PrintStream(baos);
+    new ToTrevniTool().run(null, p, null, Arrays.asList(args));
+    return baos.toString("UTF-8").replace("\r", "");
+  }
+  
+  @Test
+  public void test() throws Exception {
+    Schema schema = Schema.parse(SCHEMA_FILE);
+
+    DataFileWriter<Object> writer =
+      new DataFileWriter<Object>(new GenericDatumWriter<Object>());
+    writer.create(schema, Util.createFromFS(AVRO_FILE.toString()));
+    for (Object datum : new RandomData(schema, COUNT))
+      writer.append(datum);
+    writer.close();
+
+    run(AVRO_FILE.toString(), TREVNI_FILE.toString());
+
+    AvroColumnReader<Object> reader =
+      new AvroColumnReader<Object>(new AvroColumnReader.Params(TREVNI_FILE));
+    Iterator<Object> found = reader.iterator();
+    for (Object expected : new RandomData(schema, COUNT))
+      assertEquals(expected, found.next());
+    reader.close();
+  }
+
+}

Propchange: avro/trunk/lang/java/tools/src/test/java/org/apache/avro/tool/TestToTrevniTool.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message