Return-Path: Delivered-To: apmail-hadoop-avro-commits-archive@minotaur.apache.org Received: (qmail 32182 invoked from network); 29 Jan 2010 19:12:10 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 29 Jan 2010 19:12:10 -0000 Received: (qmail 4541 invoked by uid 500); 29 Jan 2010 19:12:10 -0000 Delivered-To: apmail-hadoop-avro-commits-archive@hadoop.apache.org Received: (qmail 4511 invoked by uid 500); 29 Jan 2010 19:12:10 -0000 Mailing-List: contact avro-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: avro-dev@hadoop.apache.org Delivered-To: mailing list avro-commits@hadoop.apache.org Received: (qmail 4499 invoked by uid 99); 29 Jan 2010 19:12:10 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 29 Jan 2010 19:12:10 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 29 Jan 2010 19:12:09 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D341623889EC; Fri, 29 Jan 2010 19:11:48 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r904596 - in /hadoop/avro/trunk: ./ lang/java/src/java/org/apache/avro/io/ lang/java/src/java/org/apache/avro/io/parsing/ lang/java/src/test/java/org/apache/avro/io/ lang/java/src/test/java/org/apache/avro/io/parsing/ Date: Fri, 29 Jan 2010 19:11:47 -0000 To: avro-commits@hadoop.apache.org From: thiru@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100129191148.D341623889EC@eris.apache.org> Author: thiru Date: Fri Jan 29 19:11:45 2010 New Revision: 904596 URL: http://svn.apache.org/viewvc?rev=904596&view=rev Log: AVRO-383. Optiminzing ResolvingDecoder for default values Added: hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/ hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java Modified: hadoop/avro/trunk/CHANGES.txt hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java Modified: hadoop/avro/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=904596&r1=904595&r2=904596&view=diff ============================================================================== --- hadoop/avro/trunk/CHANGES.txt (original) +++ hadoop/avro/trunk/CHANGES.txt Fri Jan 29 19:11:45 2010 @@ -291,6 +291,8 @@ AVRO-343. Minor fixes to Eclipse config after build re-org (philz) + AVRO-383. Optiminzing ResolvingDecoder for default values (thiru) + BUG FIXES AVRO-176. Safeguard against bad istreams before reading. (sbanacho) Modified: hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java?rev=904596&r1=904595&r2=904596&view=diff ============================================================================== --- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java (original) +++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/ResolvingDecoder.java Fri Jan 29 19:11:45 2010 @@ -165,7 +165,7 @@ } else if (top instanceof Symbol.DefaultStartAction) { Symbol.DefaultStartAction dsa = (Symbol.DefaultStartAction) top; backup = in; - in = (new JsonDecoder(dsa.root, new ByteArrayInputStream(dsa.contents))); + in = (new BinaryDecoder(new ByteArrayInputStream(dsa.contents))); } else if (top == Symbol.DEFAULT_END_ACTION) { in = backup; } else { Modified: hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java?rev=904596&r1=904595&r2=904596&view=diff ============================================================================== --- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java (original) +++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java Fri Jan 29 19:11:45 2010 @@ -17,14 +17,20 @@ */ package org.apache.avro.io.parsing; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.Encoder; +import org.codehaus.jackson.JsonNode; /** * The class that generates a resolving grammar to resolve between two @@ -258,9 +264,9 @@ Field wf = wfields.get(fname); if (wf == null) { Field rf = rfe.getValue(); - production[--count] = new Symbol.DefaultStartAction( - new JsonGrammarGenerator().generate(rf.schema()), - rf.defaultValue()); + Symbol r = new JsonGrammarGenerator().generate(rf.schema()); + byte[] bb = getBinary(rf.schema(), rf.defaultValue()); + production[--count] = new Symbol.DefaultStartAction(r, bb); production[--count] = super.generate(rf.schema(), seen); production[--count] = Symbol.DEFAULT_END_ACTION; } @@ -269,6 +275,110 @@ return result; } + /** + * Returns the Avro binary encoded version of n according to + * the schema s. + * @param s The schema for encoding + * @param n The Json node that has the value to be encoded. + * @return The binary encoded version of n. + * @throws IOException + */ + private static byte[] getBinary(Schema s, JsonNode n) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Encoder e = new BinaryEncoder(out); + encode(e, s, n); + return out.toByteArray(); + } + + /** + * Encodes the given Json node n on to the encoder e + * according to the schema s. + * @param e The encoder to encode into. + * @param s The schema for the object being encoded. + * @param n The Json node to encode. + * @throws IOException + */ + + static void encode(Encoder e, Schema s, JsonNode n) + throws IOException { + switch (s.getType()) { + case RECORD: + for (Map.Entry entry : s.getFields().entrySet()) { + String name = entry.getKey(); + Field f = entry.getValue(); + JsonNode v = n.get(name); + if (v == null) { + v = f.defaultValue(); + } + if (v == null) { + throw new AvroTypeException("No default value for: " + name); + } + encode(e, f.schema(), v); + } + break; + case ENUM: + e.writeEnum(s.getEnumOrdinal(n.getTextValue())); + break; + case ARRAY: + e.writeArrayStart(); + e.setItemCount(n.size()); + Schema i = s.getElementType(); + for (JsonNode node : n) { + e.startItem(); + encode(e, i, node); + } + e.writeArrayEnd(); + break; + case MAP: + e.writeMapStart(); + e.setItemCount(n.size()); + Schema v = s.getValueType(); + for (Iterator it = n.getFieldNames(); it.hasNext();) { + e.startItem(); + String key = it.next(); + e.writeString(key); + encode(e, v, n.get(key)); + } + e.writeMapEnd(); + break; + case UNION: + e.writeIndex(0); + encode(e, s.getTypes().get(0), n); + break; + case FIXED: + byte[] bb = n.getTextValue().getBytes("ISO-8859-1"); + if (bb.length != s.getFixedSize()) { + bb = Arrays.copyOf(bb, s.getFixedSize()); + } + e.writeFixed(bb); + break; + case STRING: + e.writeString(n.getTextValue()); + break; + case BYTES: + e.writeBytes(n.getTextValue().getBytes("ISO-8859-1")); + break; + case INT: + e.writeInt(n.getIntValue()); + break; + case LONG: + e.writeLong(n.getLongValue()); + break; + case FLOAT: + e.writeFloat((float) n.getDoubleValue()); + break; + case DOUBLE: + e.writeDouble(n.getDoubleValue()); + break; + case BOOLEAN: + e.writeBoolean(n.getBooleanValue()); + break; + case NULL: + e.writeNull(); + break; + } + } + private static Symbol mkEnumAdjust(List rsymbols, List wsymbols){ Object[] adjustments = new Object[wsymbols.size()]; Modified: hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java?rev=904596&r1=904595&r2=904596&view=diff ============================================================================== --- hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java (original) +++ hadoop/avro/trunk/lang/java/src/java/org/apache/avro/io/parsing/Symbol.java Fri Jan 29 19:11:45 2010 @@ -17,8 +17,6 @@ */ package org.apache.avro.io.parsing; -import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -27,11 +25,6 @@ import java.util.NoSuchElementException; import org.apache.avro.Schema; -import org.codehaus.jackson.JsonEncoding; -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonGenerator; -import org.codehaus.jackson.JsonNode; -import org.codehaus.jackson.map.ObjectMapper; /** * Symbol is the base of all symbols (terminals and non-terminals) of @@ -477,15 +470,9 @@ public static class DefaultStartAction extends ImplicitAction { public final Symbol root; public final byte[] contents; - public DefaultStartAction(Symbol root, JsonNode defaultValue) - throws IOException { + public DefaultStartAction(Symbol root, byte[] contents) { this.root = root; - ByteArrayOutputStream os = new ByteArrayOutputStream(); - JsonGenerator g = new JsonFactory().createJsonGenerator(os, - JsonEncoding.UTF8); - new ObjectMapper().writeTree(g, defaultValue); - g.flush(); - this.contents = os.toByteArray(); + this.contents = contents; } } Modified: hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java?rev=904596&r1=904595&r2=904596&view=diff ============================================================================== --- hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java (original) +++ hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/Perf.java Fri Jan 29 19:11:45 2010 @@ -26,6 +26,7 @@ import java.util.Random; import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; /** * Performance tests for various low level operations of @@ -52,6 +53,8 @@ tests.add(new NestedRecordTest()); } else if (a.equals("-S")) { tests.add(new ResolverTest()); + } else if (a.equals("-M")) { + tests.add(new MigrationTest()); } else { usage(); System.exit(1); @@ -281,6 +284,49 @@ } + /** + * Tests the performance of introducing default values. + */ + private static class MigrationTest extends RepeaterTest { + private final Schema readerSchema; + public MigrationTest() throws IOException { + super("MigrationTest"); + readerSchema = Schema.parse( "{ \"type\": \"array\", \"items\":\n" + + "{ \"type\": \"record\", \"name\": \"R\", \"fields\": [\n" + + "{ \"name\": \"f1\", \"type\": \"double\" },\n" + + "{ \"name\": \"f2\", \"type\": \"double\" },\n" + + "{ \"name\": \"f3\", \"type\": \"double\" },\n" + + "{ \"name\": \"f3_1\", \"type\": \"string\", " + + "\"default\": \"undefined\" },\n" + + "{ \"name\": \"f3_2\", \"type\": \"string\"," + + "\"default\": \"undefined\" },\n" + + "{ \"name\": \"f4\", \"type\": \"int\" },\n" + + "{ \"name\": \"f5\", \"type\": \"int\" },\n" + + "{ \"name\": \"f6\", \"type\": \"int\" }\n" + + "] } }"); + } + + @Override + protected Decoder getDecoder() throws IOException { + return new ResolvingDecoder(schema, readerSchema, newDecoder(data)); + } + + @Override + protected void readInternal(Decoder d) throws IOException { + ResolvingDecoder r = (ResolvingDecoder) d; + Field[] ff = r.readFieldOrder(); + for (Field f : ff) { + if (f.pos() < 3) { + r.readDouble(); + } else if (f.pos() >= 5) { + r.readInt(); + } else { + r.readString(null); + } + } + } + } + private static class NestedRecordTest extends ReadInt { public NestedRecordTest() throws IOException { super("NestedRecordTest", Added: hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java?rev=904596&view=auto ============================================================================== --- hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java (added) +++ hadoop/avro/trunk/lang/java/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java Fri Jan 29 19:11:45 2010 @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io.parsing; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.StringReader; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.ValidatingEncoder; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.map.ObjectMapper; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestResolvingGrammarGenerator { + private final Schema schema; + private final JsonNode data; + + public TestResolvingGrammarGenerator(String jsonSchema, String jsonData) + throws IOException { + this.schema = Schema.parse(jsonSchema); + JsonFactory factory = new JsonFactory(); + ObjectMapper mapper = new ObjectMapper(factory); + + this.data = mapper.readTree(new StringReader(jsonData)); + } + + @Test + public void test() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + Encoder e = new ValidatingEncoder(schema, new BinaryEncoder(baos)); + + ResolvingGrammarGenerator.encode(e, schema, data); + } + + @Parameterized.Parameters + public static Collection data() { + Collection ret = Arrays.asList( + new Object[][] { + { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] } }", + "{ \"f2\": 10.4, \"f1\": 10 } " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + + "[ \"s1\", \"s2\"] } }", " \"s1\" " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + + "[ \"s1\", \"s2\"] } }", " \"s2\" " }, + { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", + "\"hello\"" }, + { "{ \"type\": \"array\", \"items\": \"int\" }", + "[ 10, 20, 30 ]" }, + { "{ \"type\": \"map\", \"values\": \"int\" }", + "{ \"k1\": 10, \"k3\": 20, \"k3\": 30 }" }, + { "[ \"int\", \"long\" ]", "10" }, + { "\"string\"", "\"hello\"" }, + { "\"bytes\"", "\"hello\"" }, + { "\"int\"", "10" }, + { "\"long\"", "10" }, + { "\"float\"", "10.0" }, + { "\"double\"", "10.0" }, + { "\"boolean\"", "true" }, + { "\"boolean\"", "false" }, + { "\"null\"", "null" }, + } + ); + return ret; + } + +}