avro-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Rob Turner <robairrob...@gmail.com>
Subject Re: Whats wrong with this Avro code?
Date Thu, 19 Mar 2015 19:16:46 GMT
Hi Sachin,

I suspect it is an character encoding issue as the Avro bytes generated are
converted to a String using the default character encoding which is
probably UTF-8 and then converted back to bytes using the same character
encoding. This does not preserve the original bytes in general as some byte
values are not valid UTF-8. You can use the ISO-8859-1 encoding on the new
String(bytes, "ISO-8859-1") and binaryString.getBytes("ISO-8859-1") or
better still just use the bytes variable in the call to binaryDecoder.

Regards
   Rob Turner.

On 19 March 2015 at 16:49, S G <sg.online.email@gmail.com> wrote:

> Hi,
>
> Here is a failing test which I think might be a bug.
> Can someone point to me whats wrong with this code?
>
> public class TestBinaryDecoderSeparateSchema {
>
>   @Test
>   public void checkAvroWithoutEmbeddedSchema () throws Exception {
>
>     Person person = new Person();
>
>     ReflectData rdata = ReflectData.AllowNull.get();
>     Schema schema = rdata.getSchema(Person.class);
>
>     // Write avro as binary
>     ByteArrayOutputStream os = new ByteArrayOutputStream();
>     Encoder encoder = EncoderFactory.get().binaryEncoder(os, null);
>     DatumWriter<Person> writer = new ReflectDatumWriter<Person>(schema,
> rdata);
>     writer.write(person, encoder);
>     encoder.flush();
>     byte[] bytes = os.toByteArray();
>     String binaryString = new String (bytes);
>     log (binaryString);
>
>     // Read avro binary string into GenericRecord
>     BinaryDecoder decoder =
> DecoderFactory.get().binaryDecoder(binaryString.getBytes(), null);
>     GenericDatumReader<GenericRecord> datumReader = new
> GenericDatumReader<GenericRecord> ();
>     datumReader.setSchema(schema);
>     GenericRecord record = datumReader.read(null, decoder);
>   }
>
>
>   @Test
>   public void checkAvroWithEmbeddedSchema () throws Exception {
>
>     Person person = new Person();
>
>     ReflectData rdata = ReflectData.AllowNull.get();
>     Schema schema = rdata.getSchema(Person.class);
>
>     // Write avro with embedded schema
>     ReflectDatumWriter<Person> datumWriter = new
> ReflectDatumWriter<Person> (Person.class, rdata);
>     DataFileWriter<Person> fileWriter = new DataFileWriter<Person>
> (datumWriter);
>     ByteArrayOutputStream baos = new ByteArrayOutputStream();
>     fileWriter.create(schema, baos);
>     fileWriter.append(person);
>     fileWriter.close();
>     byte[] bytes = baos.toByteArray();
>     String binaryString = new String (bytes);
>     log (binaryString);
>
>     // Read avro with embedded schema
>     GenericDatumReader<GenericRecord> datumReader = new
> GenericDatumReader<GenericRecord> ();
>     SeekableByteArrayInput avroInputStream = new
> SeekableByteArrayInput(bytes);
>     DataFileReader<GenericRecord> fileReader =
>             new DataFileReader<GenericRecord>(avroInputStream,
> datumReader);
>
>     schema = fileReader.getSchema();
>     GenericRecord record = null;
>     List<GenericRecord> records = new ArrayList<GenericRecord> ();
>     while (fileReader.hasNext())
>         records.add (fileReader.next(record));
>
>     log ("Read " + records.size() + " records");
>   }
>
>   private static class Person {
>     String name = "John";
>     Long age = 30L;
>     Address address = new Address();
>   }
>
>   private static class Address {
>     String city = "San Francisco";
>     Integer zipCode = 90900;
>   }
>
>   private static void log (String s) {
>     System.out.println (s);
>   }
> }
>
>
> *Exception is:*
> java.io.IOException: Invalid int encoding
>     at org.apache.avro.io.BinaryDecoder.readInt(BinaryDecoder.java:145)
>     at
> org.apache.avro.io.ValidatingDecoder.readInt(ValidatingDecoder.java:83)
>     at
> org.apache.avro.generic.GenericDatumReader.readInt(GenericDatumReader.java:444)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:159)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:155)
>     at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193)
>     at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:155)
>     at
> org.apache.avro.generic.GenericDatumReader.readField(GenericDatumReader.java:193)
>     at
> org.apache.avro.generic.GenericDatumReader.readRecord(GenericDatumReader.java:183)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:151)
>     at
> org.apache.avro.generic.GenericDatumReader.read(GenericDatumReader.java:142)
>     at
> org.apache.avro.io.TestBinaryDecoderSeparateSchema.checkAvroWithoutEmbeddedSchema(TestBinaryDecoderSeparateSchema.java:68)
>
>
> Thanks
> Sachin
>
>

Mime
View raw message