avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1197768 - in /avro/trunk: CHANGES.txt lang/java/avro/src/main/java/org/apache/avro/Schema.java lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
Date Fri, 04 Nov 2011 20:58:36 GMT
Author: cutting
Date: Fri Nov  4 20:58:36 2011
New Revision: 1197768

URL: http://svn.apache.org/viewvc?rev=1197768&view=rev
Log:
AVRO-946. Java: Optimize union resolution when writing.

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java
    avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Fri Nov  4 20:58:36 2011
@@ -15,6 +15,8 @@ Avro 1.6.1 (unreleased)
 
   OPTIMIZATIONS
 
+    AVRO-946. Java: Optimize union resolution when writing. (cutting)
+
   IMPROVEMENTS
 
   BUG FIXES

Modified: avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java (original)
+++ avro/trunk/lang/java/avro/src/main/java/org/apache/avro/Schema.java Fri Nov  4 20:58:36
2011
@@ -91,6 +91,7 @@ public abstract class Schema {
       INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL;
     private String name;
     private Type() { this.name = this.name().toLowerCase(); }
+    public String getName() { return name; }
   };
 
   private final Type type;
@@ -310,6 +311,11 @@ public abstract class Schema {
     throw new AvroRuntimeException("Not a union: "+this);
   }
 
+  /** If this is a union, return the branch with the provided full name. */
+  public Integer getIndexNamed(String name) {
+    throw new AvroRuntimeException("Not a union: "+this);
+  }
+
   /** If this is fixed, returns its size. */
   public int getFixedSize() {
     throw new AvroRuntimeException("Not fixed: "+this);
@@ -790,37 +796,24 @@ public abstract class Schema {
 
   private static class UnionSchema extends Schema {
     private final List<Schema> types;
+    private final Map<String,Integer> indexByName
+      = new HashMap<String,Integer>();
     public UnionSchema(LockableArrayList<Schema> types) {
       super(Type.UNION);
       this.types = types.lock();
-      int seen = 0;
-      Set<String> seenNames = new HashSet<String>();
-      for (Schema type : types) {                 // check legality of union
-        switch (type.getType()) {
-        case UNION: 
+      int index = 0;
+      for (Schema type : types) {
+        if (type.getType() == Type.UNION)
           throw new AvroRuntimeException("Nested union: "+this);
-        case RECORD:
-        case FIXED:
-        case ENUM:
-          String fullname = type.getFullName();
-          if (fullname != null) {
-            if (seenNames.add(fullname)) {
-              continue;
-            } else {
-              throw new AvroRuntimeException("Duplicate name in union:" + fullname);
-            }
-          } else {
-            throw new AvroRuntimeException("Nameless Record, Fixed, or Enum in union:"+this);
-          }
-        default:
-          int mask = 1 << type.getType().ordinal();
-          if ((seen & mask) != 0)
-            throw new AvroRuntimeException("Ambiguous union: "+this);
-          seen |= mask;
-        }
+        String name = type.getFullName();
+        if (name == null)
+          throw new AvroRuntimeException("Nameless in union:"+this);
+        if (indexByName.put(name, index++) != null)
+          throw new AvroRuntimeException("Duplicate in union:" + name);
       }
     }
     public List<Schema> getTypes() { return types; }
+    public Integer getIndexNamed(String name) { return indexByName.get(name); }
     public boolean equals(Object o) {
       if (o == this) return true;
       if (!(o instanceof UnionSchema)) return false;

Modified: avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java (original)
+++ avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java Fri Nov
 4 20:58:36 2011
@@ -541,17 +541,46 @@ public class GenericData {
   }
 
   /** Return the index for a datum within a union.  Implemented with {@link
-   * #instanceOf(Schema,Object)}.*/
+   * Schema#getIndexNamed(String)} and {@link #getSchemaName(Schema,Object)}.*/
   public int resolveUnion(Schema union, Object datum) {
-    int i = 0;
-    for (Schema type : union.getTypes()) {
-      if (instanceOf(type, datum))
-        return i;
-      i++;
-    }
+    Integer i = union.getIndexNamed(getSchemaName(datum));
+    if (i != null)
+      return i;
     throw new UnresolvedUnionException(union, datum);
   }
 
+  /** Return the schema full name for a datum.  Called by {@link
+   * #resolveUnion(Schema,Object)}. */
+  protected String getSchemaName(Object datum) {
+    if (datum == null)
+      return Type.NULL.getName();
+    if (isRecord(datum))
+      return getRecordSchema(datum).getFullName();
+    if (isEnum(datum))
+      return getEnumSchema(datum).getFullName();
+    if (isArray(datum))
+      return Type.ARRAY.getName();
+    if (isMap(datum))
+      return Type.MAP.getName();
+    if (isFixed(datum))
+      return getFixedSchema(datum).getFullName();
+    if (isString(datum))
+      return Type.STRING.getName();
+    if (isBytes(datum))
+      return Type.BYTES.getName();
+    if (datum instanceof Integer)
+      return Type.INT.getName();
+    if (datum instanceof Long)
+      return Type.LONG.getName();
+    if (datum instanceof Float)
+      return Type.FLOAT.getName();
+    if (datum instanceof Double)
+      return Type.DOUBLE.getName();
+    if (datum instanceof Boolean)
+      return Type.BOOLEAN.getName();
+    throw new AvroRuntimeException("Unknown datum type: "+datum);
+ }
+
   /** Called by {@link #resolveUnion(Schema,Object)}.  May be overridden for
       alternate data representations.*/
   protected boolean instanceOf(Schema schema, Object datum) {

Modified: avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java?rev=1197768&r1=1197767&r2=1197768&view=diff
==============================================================================
--- avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
(original)
+++ avro/trunk/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
Fri Nov  4 20:58:36 2011
@@ -67,7 +67,7 @@ public class GenericDatumWriter<D> imple
       case ARRAY:  writeArray(schema, datum, out);  break;
       case MAP:    writeMap(schema, datum, out);    break;
       case UNION:
-        int index = data.resolveUnion(schema, datum);
+        int index = resolveUnion(schema, datum);
         out.writeIndex(index);
         write(schema.getTypes().get(index), datum, out);
         break;
@@ -131,6 +131,12 @@ public class GenericDatumWriter<D> imple
     out.writeArrayEnd();
   }
 
+  /** Called to find the index for a datum within a union.  By default calls
+   * {@link GenericData#resolveUnion(Schema,Object)}.*/
+  protected int resolveUnion(Schema union, Object datum) {
+    return data.resolveUnion(union, datum);
+  }
+
   /** Called by the default implementation of {@link #writeArray} to get the
    * size of an array.  The default implementation is for {@link Collection}.*/
   @SuppressWarnings("unchecked")



Mime
View raw message