avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dcrea...@apache.org
Subject svn commit: r1236284 - in /avro/trunk: ./ lang/c/src/ lang/java/avro/src/main/java/org/apache/avro/file/ lang/java/avro/src/test/java/org/apache/avro/ share/test/data/
Date Thu, 26 Jan 2012 16:47:31 GMT
Author: dcreager
Date: Thu Jan 26 16:47:31 2012
New Revision: 1236284

URL: http://svn.apache.org/viewvc?rev=1236284&view=rev
Log:
AVRO-986. C/Java: Fixed relates to sync markers in file headers.

The C bindings no longer add an "avro.sync" entry to the header of an
Avro file.  (Previously, the Java bindings (at least) would barf if this
sync marker was present, since they'd treat that as the end of the file
header.)  The Java bindings now ignore the extra sync marker, if
present.

In addition, this patch adds a new "avromod" command-line utility to the
C bindings.  This utility can be used (among other things) to remove the
sync marker from the header of an existing file.  It works by copying
all of the records from a source file to a destination file.  You can
also override the block size and compression codec of the new
destination file.

Added:
    avro/trunk/lang/c/src/avromod.c
    avro/trunk/share/test/data/syncInMeta.avro   (with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/c/src/CMakeLists.txt
    avro/trunk/lang/c/src/avrocat.c
    avro/trunk/lang/c/src/avropipe.c
    avro/trunk/lang/c/src/datafile.c
    avro/trunk/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
    avro/trunk/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Jan 26 16:47:31 2012
@@ -7,6 +7,9 @@ Avro 1.6.2 (unreleased)
     AVRO-854.  Python: Permit DataFileWriter and DataFileReader to be
     used as context managers in "with" statements. (Harsh J via cutting)
 
+    AVRO-986. C: avromod utility for modifying structural properties of
+    an Avro data file. (dcreager)
+
   OPTIMIZATIONS
 
   IMPROVEMENTS
@@ -83,6 +86,12 @@ Avro 1.6.2 (unreleased)
     AVRO-1004. C: avropipe no longer displays NUL terminator for string
     values. (dcreager)
 
+    AVRO-986. C: File headers no longer contain sync marker. (Michael
+    Cooper via dcreager)
+
+    AVRO-986. Java: DataFileReader correctly handles sync marker
+    appearing within file header. (cutting via dcreager)
+
 Avro 1.6.1 (8 November 2011)
 
   INCOMPATIBLE CHANGES

Modified: avro/trunk/lang/c/src/CMakeLists.txt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/CMakeLists.txt?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/c/src/CMakeLists.txt (original)
+++ avro/trunk/lang/c/src/CMakeLists.txt Thu Jan 26 16:47:31 2012
@@ -132,10 +132,13 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR
         DESTINATION lib/pkgconfig)
 
 add_executable(avrocat avrocat.c)
-
 target_link_libraries(avrocat avro-static)
 install(TARGETS avrocat RUNTIME DESTINATION bin)
 
 add_executable(avropipe avropipe.c)
 target_link_libraries(avropipe avro-static)
 install(TARGETS avropipe RUNTIME DESTINATION bin)
+
+add_executable(avromod avromod.c)
+target_link_libraries(avromod avro-static)
+install(TARGETS avromod RUNTIME DESTINATION bin)

Modified: avro/trunk/lang/c/src/avrocat.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avrocat.c?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/c/src/avrocat.c (original)
+++ avro/trunk/lang/c/src/avrocat.c Thu Jan 26 16:47:31 2012
@@ -34,13 +34,13 @@ process_file(const char *filename)
 	if (filename == NULL) {
 		if (avro_file_reader_fp(stdin, "<stdin>", 0, &reader)) {
 			fprintf(stderr, "Error opening <stdin>:\n  %s\n",
-				strerror(errno));
+				avro_strerror());
 			exit(1);
 		}
 	} else {
 		if (avro_file_reader(filename, &reader)) {
 			fprintf(stderr, "Error opening %s:\n  %s\n",
-				filename, strerror(errno));
+				filename, avro_strerror());
 			exit(1);
 		}
 	}

Added: avro/trunk/lang/c/src/avromod.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avromod.c?rev=1236284&view=auto
==============================================================================
--- avro/trunk/lang/c/src/avromod.c (added)
+++ avro/trunk/lang/c/src/avromod.c Thu Jan 26 16:47:31 2012
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avro.h"
+#include "avro_private.h"
+
+
+/* The compression codec to use. */
+static const char  *codec = "null";
+
+/* The block size to use. */
+static size_t  block_size = 0;
+
+/*-- PROCESSING A FILE --*/
+
+static void
+process_file(const char *in_filename, const char *out_filename)
+{
+	avro_file_reader_t  reader;
+	avro_file_writer_t  writer;
+
+	if (in_filename == NULL) {
+		if (avro_file_reader_fp(stdin, "<stdin>", 0, &reader)) {
+			fprintf(stderr, "Error opening <stdin>:\n  %s\n",
+				avro_strerror());
+			exit(1);
+		}
+	} else {
+		if (avro_file_reader(in_filename, &reader)) {
+			fprintf(stderr, "Error opening %s:\n  %s\n",
+				in_filename, avro_strerror());
+			exit(1);
+		}
+	}
+
+	avro_schema_t  wschema;
+	avro_value_iface_t  *iface;
+	avro_value_t  value;
+
+	wschema = avro_file_reader_get_writer_schema(reader);
+	iface = avro_generic_class_from_schema(wschema);
+	avro_generic_value_new(iface, &value);
+
+	if (avro_file_writer_create_with_codec
+	    (out_filename, wschema, &writer, codec, block_size)) {
+		fprintf(stderr, "Error creating %s:\n  %s\n",
+			out_filename, avro_strerror());
+		exit(1);
+	}
+
+	while (avro_file_reader_read_value(reader, &value) == 0) {
+		if (avro_file_writer_append_value(writer, &value)) {
+			fprintf(stderr, "Error writing to %s:\n  %s\n",
+				out_filename, avro_strerror());
+			exit(1);
+		}
+		avro_value_reset(&value);
+	}
+
+	avro_file_reader_close(reader);
+	avro_file_writer_close(writer);
+	avro_value_decref(&value);
+	avro_value_iface_decref(iface);
+}
+
+
+/*-- MAIN PROGRAM --*/
+
+static struct option longopts[] = {
+	{ "block-size", required_argument, NULL, 'b' },
+	{ "codec", required_argument, NULL, 'c' },
+	{ NULL, 0, NULL, 0 }
+};
+
+static void usage(void)
+{
+	fprintf(stderr,
+		"Usage: avromod [--codec=<compression codec>]\n"
+		"               [--block-size=<block size>]\n"
+		"               [<input avro file>]\n"
+		"                <output avro file>\n");
+}
+
+static void
+parse_block_size(const char *optarg)
+{
+	unsigned long  ul;
+	char  *end;
+
+	ul = strtoul(optarg, &end, 10);
+	if ((ul == 0 && end == optarg) ||
+	    (ul == ULONG_MAX && errno == ERANGE)) {
+		fprintf(stderr, "Invalid block size: %s\n\n", optarg);
+		usage();
+		exit(1);
+	}
+	block_size = ul;
+}
+
+
+int main(int argc, char **argv)
+{
+	char  *in_filename;
+	char  *out_filename;
+
+	int  ch;
+	while ((ch = getopt_long(argc, argv, "b:c:", longopts, NULL)) != -1) {
+		switch (ch) {
+			case 'b':
+				parse_block_size(optarg);
+				break;
+
+			case 'c':
+				codec = optarg;
+				break;
+
+			default:
+				usage();
+				exit(1);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (argc == 2) {
+		in_filename = argv[0];
+		out_filename = argv[1];
+	} else if (argc == 1) {
+		in_filename = NULL;
+		out_filename = argv[0];
+	} else {
+		fprintf(stderr, "Can't read from multiple input files.\n");
+		usage();
+		exit(1);
+	}
+
+	/* Process the data file */
+	process_file(in_filename, out_filename);
+	return 0;
+}

Modified: avro/trunk/lang/c/src/avropipe.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/avropipe.c?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/c/src/avropipe.c (original)
+++ avro/trunk/lang/c/src/avropipe.c Thu Jan 26 16:47:31 2012
@@ -339,13 +339,13 @@ process_file(const char *filename)
 	if (filename == NULL) {
 		if (avro_file_reader_fp(stdin, "<stdin>", 0, &reader)) {
 			fprintf(stderr, "Error opening <stdin>:\n  %s\n",
-				strerror(errno));
+				avro_strerror());
 			exit(1);
 		}
 	} else {
 		if (avro_file_reader(filename, &reader)) {
 			fprintf(stderr, "Error opening %s:\n  %s\n",
-				filename, strerror(errno));
+				filename, avro_strerror());
 			exit(1);
 		}
 	}

Modified: avro/trunk/lang/c/src/datafile.c
URL: http://svn.apache.org/viewvc/avro/trunk/lang/c/src/datafile.c?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/c/src/datafile.c (original)
+++ avro/trunk/lang/c/src/datafile.c Thu Jan 26 16:47:31 2012
@@ -86,9 +86,7 @@ static int write_header(avro_file_writer
 	check(rval, avro_write(w->writer, "Obj", 3));
 	check(rval, avro_write(w->writer, &version, 1));
 
-	check(rval, enc->write_long(w->writer, 3));
-	check(rval, enc->write_string(w->writer, "avro.sync"));
-	check(rval, enc->write_bytes(w->writer, w->sync, sizeof(w->sync)));
+	check(rval, enc->write_long(w->writer, 2));
 	check(rval, enc->write_string(w->writer, "avro.codec"));
 	check(rval, enc->write_bytes(w->writer, w->codec->name, strlen(w->codec->name)));
 	check(rval, enc->write_string(w->writer, "avro.schema"));

Modified: avro/trunk/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java (original)
+++ avro/trunk/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java Thu Jan
26 16:47:31 2012
@@ -125,6 +125,11 @@ public class DataFileReader<D>
   @Override
   public void sync(long position) throws IOException {
     seek(position);
+    // work around an issue where 1.5.4 C stored sync in metadata
+    if ((position == 0) && (getMeta("avro.sync") != null)) {
+      initialize(sin);                            // re-init to skip header
+      return;
+    }
     try {
       int i=0, b;
       InputStream in = vin.inputStream();

Modified: avro/trunk/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java?rev=1236284&r1=1236283&r2=1236284&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java (original)
+++ avro/trunk/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java Thu Jan 26 16:47:31
2012
@@ -257,6 +257,15 @@ public class TestDataFile {
     assertNotNull("Should be able to reopen at sync point", reader.next());
   }
 
+  @Test public void testSyncInHeader() throws IOException {
+    DataFileReader<Object> reader = new DataFileReader<Object>
+      (new File("../../../share/test/data/syncInMeta.avro"),
+       new GenericDatumReader<Object>());
+    reader.sync(0);
+    for (Object datum : reader)
+      assertNotNull(datum);
+  }
+
   @Test public void test12() throws IOException {
     readFile(new File("../../../share/test/data/test.avro12"),
              new GenericDatumReader<Object>());

Added: avro/trunk/share/test/data/syncInMeta.avro
URL: http://svn.apache.org/viewvc/avro/trunk/share/test/data/syncInMeta.avro?rev=1236284&view=auto
==============================================================================
Binary file - no diff available.

Propchange: avro/trunk/share/test/data/syncInMeta.avro
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message