hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r608738 - in /lucene/hadoop/trunk/src/contrib/hbase: ./ src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/io/ src/test/org/apache/hadoop/hbase/io/
Date Fri, 04 Jan 2008 03:52:53 GMT
Author: stack
Date: Thu Jan  3 19:52:53 2008
New Revision: 608738

URL: http://svn.apache.org/viewvc?rev=608738&view=rev
Log:
HADOOP-2519 Performance improvements: Customized RPC serialization

Added:
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/io/TestHbaseObjectWritable.java
Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=608738&r1=608737&r2=608738&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Thu Jan  3 19:52:53 2008
@@ -7,6 +7,7 @@
    HADOOP-2079 Fix generated HLog, HRegion names
    HADOOP-2495 Minor performance improvements: Slim-down BatchOperation, etc. 
    HADOOP-2506 Remove the algebra package
+   HADOOP-2519 Performance improvements: Customized RPC serialization
 
   NEW FEATURES
     HADOOP-2061 Add new Base64 dialects

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java?rev=608738&r1=608737&r2=608738&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
Thu Jan  3 19:52:53 2008
@@ -32,9 +32,11 @@
 public interface HMasterInterface extends VersionedProtocol {
   /**
    * Interface version.
-   * Version was incremented to 2 when we brought the hadoop RPC local to hbase.
+   * Version was incremented to 2 when we brought the hadoop RPC local to hbase
+   * -- HADOOP-2495 and then to 3 when we changed the RPC to send codes instead
+   * of actual class names (HADOOP-2519).
    */
-  public static final long versionID = 2L;
+  public static final long versionID = 3L;
 
   /** @return true if master is available */
   public boolean isMasterRunning();
@@ -109,4 +111,4 @@
    * @return address of server that serves the root region
    */
   public HServerAddress findRootRegion();
-}
\ No newline at end of file
+}

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java?rev=608738&r1=608737&r2=608738&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
(original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
Thu Jan  3 19:52:53 2008
@@ -18,42 +18,116 @@
 
 package org.apache.hadoop.hbase.io;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import java.lang.reflect.Array;
+import java.util.HashMap;
+import java.util.Map;
 
-import java.io.*;
-import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.conf.*;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HMsg;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.HServerInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.filter.RowFilterInterface;
+import org.apache.hadoop.hbase.filter.RowFilterSet;
+import org.apache.hadoop.io.MapWritable;
+import org.apache.hadoop.io.ObjectWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableFactories;
 
-/** A polymorphic Writable that writes an instance with it's class name.
- * Handles arrays, strings and primitive types without a Writable wrapper.
- * 
- * This is a copy of the hadoop version renamed.  Removes UTF8 (HADOOP-414).
- * Using Text intead of UTF-8 saves ~2% CPU between reading and writing objects
- * running a short sequentialWrite Performance Evaluation test just in
+/** 
+ * This is a customized version of the polymorphic hadoop
+ * {@link ObjectWritable}.  It removes UTF8 (HADOOP-414).
+ * Using {@link Text} intead of UTF-8 saves ~2% CPU between reading and writing
+ * objects running a short sequentialWrite Performance Evaluation test just in
  * ObjectWritable alone; more when we're doing randomRead-ing.  Other
  * optimizations include our passing codes for classes instead of the
- * actual class names themselves.
- * 
- * <p>Has other optimizations passing codes instead of class names.
+ * actual class names themselves.  This makes it so this class needs amendment
+ * if non-Writable classes are introduced -- if passed a Writable for which we
+ * have no code, we just do the old-school passing of the class name, etc. --
+ * but passing codes the  savings are large particularly when cell
+ * data is small (If < a couple of kilobytes, the encoding/decoding of class
+ * name and reflection to instantiate class was costing in excess of the cell
+ * handling).
  */
 public class HbaseObjectWritable implements Writable, Configurable {
-
-  private Class declaredClass;
+  protected final static Log LOG = LogFactory.getLog(HbaseObjectWritable.class);
+  
+  // Here we maintain two static maps of classes to code and vice versa.
+  // Add new classes+codes as wanted or figure way to auto-generate these
+  // maps from the HMasterInterface.
+  static final Map<Byte, Class<?>> CODE_TO_CLASS =
+    new HashMap<Byte, Class<?>>();
+  static final Map<Class<?>, Byte> CLASS_TO_CODE =
+    new HashMap<Class<?>, Byte>();
+  // Special code that means 'not-encoded'; in this case we do old school
+  // sending of the class name using reflection, etc.
+  private static final byte NOT_ENCODED = 0;
+  static {
+    byte code = NOT_ENCODED + 1;
+    // Primitive types.
+    addToMap(Boolean.TYPE, code++);
+    addToMap(Byte.TYPE, code++);
+    addToMap(Character.TYPE, code++);
+    addToMap(Short.TYPE, code++);
+    addToMap(Integer.TYPE, code++);
+    addToMap(Long.TYPE, code++);
+    addToMap(Float.TYPE, code++);
+    addToMap(Double.TYPE, code++);
+    addToMap(Void.TYPE, code++);
+    // Other java types
+    addToMap(String.class, code++);
+    addToMap(byte [].class, code++);
+    // Hadoop types
+    addToMap(Text.class, code++);
+    addToMap(Writable.class, code++);
+    addToMap(MapWritable.class, code++);
+    addToMap(NullInstance.class, code++);
+    try {
+      addToMap(Class.forName("[Lorg.apache.hadoop.io.Text;"), code++);
+    } catch (ClassNotFoundException e) {
+      e.printStackTrace();
+    }
+    // Hbase types
+    addToMap(HServerInfo.class, code++);
+    addToMap(HMsg.class, code++);
+    addToMap(HTableDescriptor.class, code++);
+    addToMap(HColumnDescriptor.class, code++);
+    addToMap(RowFilterInterface.class, code++);
+    addToMap(RowFilterSet.class, code++);
+    addToMap(HRegionInfo.class, code++);
+    addToMap(BatchUpdate.class, code++);
+    addToMap(HServerAddress.class, code++);
+    addToMap(HRegionInfo.class, code++);
+    try {
+      addToMap(Class.forName("[Lorg.apache.hadoop.hbase.HMsg;"), code++);
+    } catch (ClassNotFoundException e) {
+      e.printStackTrace();
+    }
+  }
+  
+  private Class<?> declaredClass;
   private Object instance;
   private Configuration conf;
 
-  public HbaseObjectWritable() {}
+  public HbaseObjectWritable() {
+    super();
+  }
   
   public HbaseObjectWritable(Object instance) {
     set(instance);
   }
 
-  public HbaseObjectWritable(Class declaredClass, Object instance) {
+  public HbaseObjectWritable(Class<?> declaredClass, Object instance) {
     this.declaredClass = declaredClass;
     this.instance = instance;
   }
@@ -62,7 +136,7 @@
   public Object get() { return instance; }
   
   /** Return the class this is meant to be. */
-  public Class getDeclaredClass() { return declaredClass; }
+  public Class<?> getDeclaredClass() { return declaredClass; }
   
   /** Reset the instance. */
   public void set(Object instance) {
@@ -83,55 +157,54 @@
     writeObject(out, instance, declaredClass, conf);
   }
 
-  private static final Map<String, Class<?>> PRIMITIVE_NAMES = new HashMap<String,
Class<?>>();
-  static {
-    PRIMITIVE_NAMES.put("boolean", Boolean.TYPE);
-    PRIMITIVE_NAMES.put("byte", Byte.TYPE);
-    PRIMITIVE_NAMES.put("char", Character.TYPE);
-    PRIMITIVE_NAMES.put("short", Short.TYPE);
-    PRIMITIVE_NAMES.put("int", Integer.TYPE);
-    PRIMITIVE_NAMES.put("long", Long.TYPE);
-    PRIMITIVE_NAMES.put("float", Float.TYPE);
-    PRIMITIVE_NAMES.put("double", Double.TYPE);
-    PRIMITIVE_NAMES.put("void", Void.TYPE);
-  }
-
   private static class NullInstance extends Configured implements Writable {
-    private Class<?> declaredClass;
+    Class<?> declaredClass;
     public NullInstance() { super(null); }
-    public NullInstance(Class declaredClass, Configuration conf) {
+    
+    public NullInstance(Class<?> declaredClass, Configuration conf) {
       super(conf);
       this.declaredClass = declaredClass;
     }
+    
+    @SuppressWarnings("boxing")
     public void readFields(DataInput in) throws IOException {
-      String className = Text.readString(in);
-      declaredClass = PRIMITIVE_NAMES.get(className);
-      if (declaredClass == null) {
-        try {
-          declaredClass = getConf().getClassByName(className);
-        } catch (ClassNotFoundException e) {
-          throw new RuntimeException(e.toString());
-        }
-      }
+      this.declaredClass = CODE_TO_CLASS.get(in.readByte());
     }
+    
     public void write(DataOutput out) throws IOException {
-      Text.writeString(out, declaredClass.getName());
+      writeClassCode(out, this.declaredClass);
+    }
+  }
+  
+  /**
+   * Write out the code byte for passed Class.
+   * @param out
+   * @param c
+   * @throws IOException
+   */
+  @SuppressWarnings("boxing")
+  static void writeClassCode(final DataOutput out, final Class<?> c)
+  throws IOException {
+    Byte code = CLASS_TO_CODE.get(c);
+    if (code == null) {
+      LOG.error("Unsupported type " + c);
+      throw new UnsupportedOperationException("No code for unexpected " + c);
     }
+    out.writeByte(code);
   }
 
   /** Write a {@link Writable}, {@link String}, primitive type, or an array of
    * the preceding. */
+  @SuppressWarnings({ "boxing", "unchecked" })
   public static void writeObject(DataOutput out, Object instance,
                                  Class declaredClass, 
-                                 Configuration conf) throws IOException {
-
+                                 Configuration conf)
+  throws IOException {
     if (instance == null) {                       // null
       instance = new NullInstance(declaredClass, conf);
       declaredClass = Writable.class;
     }
-
-    Text.writeString(out, declaredClass.getName()); // always write declared
-
+    writeClassCode(out, declaredClass);
     if (declaredClass.isArray()) {                // array
       int length = Array.getLength(instance);
       out.writeInt(length);
@@ -139,12 +212,9 @@
         writeObject(out, Array.get(instance, i),
                     declaredClass.getComponentType(), conf);
       }
-      
     } else if (declaredClass == String.class) {   // String
       Text.writeString(out, (String)instance);
-      
     } else if (declaredClass.isPrimitive()) {     // primitive type
-
       if (declaredClass == Boolean.TYPE) {        // boolean
         out.writeBoolean(((Boolean)instance).booleanValue());
       } else if (declaredClass == Character.TYPE) { // char
@@ -168,9 +238,15 @@
     } else if (declaredClass.isEnum()) {         // enum
       Text.writeString(out, ((Enum)instance).name());
     } else if (Writable.class.isAssignableFrom(declaredClass)) { // Writable
-      Text.writeString(out, instance.getClass().getName());
+      Class <?> c = instance.getClass();
+      Byte code = CLASS_TO_CODE.get(c);
+      if (code == null) {
+        out.writeByte(NOT_ENCODED);
+        Text.writeString(out, c.getName());
+      } else {
+        writeClassCode(out, c);
+      }
       ((Writable)instance).write(out);
-
     } else {
       throw new IOException("Can't write: "+instance+" as "+declaredClass);
     }
@@ -186,23 +262,13 @@
     
   /** Read a {@link Writable}, {@link String}, primitive type, or an array of
    * the preceding. */
-  @SuppressWarnings("unchecked")
-  public static Object readObject(DataInput in, HbaseObjectWritable objectWritable, Configuration
conf)
-    throws IOException {
-    String className = Text.readString(in);
-    Class<?> declaredClass = PRIMITIVE_NAMES.get(className);
-    if (declaredClass == null) {
-      try {
-        declaredClass = conf.getClassByName(className);
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("readObject can't find class", e);
-      }
-    }    
-
+  @SuppressWarnings({ "unchecked", "boxing" })
+  public static Object readObject(DataInput in,
+      HbaseObjectWritable objectWritable, Configuration conf)
+  throws IOException {
+    Class<?> declaredClass = CODE_TO_CLASS.get(in.readByte());
     Object instance;
-    
     if (declaredClass.isPrimitive()) {            // primitive types
-
       if (declaredClass == Boolean.TYPE) {             // boolean
         instance = Boolean.valueOf(in.readBoolean());
       } else if (declaredClass == Character.TYPE) {    // char
@@ -224,43 +290,49 @@
       } else {
         throw new IllegalArgumentException("Not a primitive: "+declaredClass);
       }
-
     } else if (declaredClass.isArray()) {              // array
       int length = in.readInt();
       instance = Array.newInstance(declaredClass.getComponentType(), length);
       for (int i = 0; i < length; i++) {
         Array.set(instance, i, readObject(in, conf));
       }
-      
     } else if (declaredClass == String.class) {        // String
       instance = Text.readString(in);
     } else if (declaredClass.isEnum()) {         // enum
-      instance = Enum.valueOf((Class<? extends Enum>) declaredClass, Text.readString(in));
+      instance = Enum.valueOf((Class<? extends Enum>) declaredClass,
+        Text.readString(in));
     } else {                                      // Writable
-      Class instanceClass = null;
-      try {
-        instanceClass = conf.getClassByName(Text.readString(in));
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException("readObject can't find class", e);
+      Class<?> instanceClass = null;
+      Byte b = in.readByte();
+      if (b.byteValue() == NOT_ENCODED) {
+        String className = Text.readString(in);
+        try {
+          instanceClass = conf.getClassByName(className);
+        } catch (ClassNotFoundException e) {
+          throw new RuntimeException("Can't find class " + className);
+        }
+      } else {
+        instanceClass = CODE_TO_CLASS.get(b);
       }
-      
       Writable writable = WritableFactories.newInstance(instanceClass, conf);
       writable.readFields(in);
       instance = writable;
-
       if (instanceClass == NullInstance.class) {  // null
         declaredClass = ((NullInstance)instance).declaredClass;
         instance = null;
       }
     }
-
     if (objectWritable != null) {                 // store values
       objectWritable.declaredClass = declaredClass;
       objectWritable.instance = instance;
     }
-
     return instance;
-      
+  }
+
+  @SuppressWarnings("boxing")
+  private static void addToMap(final Class<?> clazz, final byte code) {
+    CLASS_TO_CODE.put(clazz, code);
+    CODE_TO_CLASS.put(code, clazz);
   }
 
   public void setConf(Configuration conf) {
@@ -271,4 +343,4 @@
     return this.conf;
   }
   
-}
+}
\ No newline at end of file

Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/io/TestHbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/io/TestHbaseObjectWritable.java?rev=608738&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/io/TestHbaseObjectWritable.java
(added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/io/TestHbaseObjectWritable.java
Thu Jan  3 19:52:53 2008
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.filter.RowFilterInterface;
+import org.apache.hadoop.hbase.filter.StopRowFilter;
+import org.apache.hadoop.io.Text;
+
+public class TestHbaseObjectWritable extends TestCase {
+
+  protected void setUp() throws Exception {
+    super.setUp();
+  }
+
+  protected void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+  @SuppressWarnings("boxing")
+  public void testReadObjectDataInputConfiguration() throws IOException {
+    HBaseConfiguration conf = new HBaseConfiguration();
+    // Do primitive type
+    final int COUNT = 101;
+    assertTrue(doType(conf, COUNT, int.class).equals(COUNT));
+    // Do unsupported type.
+    boolean exception = false;
+    try {
+      doType(conf, new File("a"), File.class);
+    } catch (UnsupportedOperationException uoe) {
+      exception = true;
+    }
+    assertTrue(exception);
+    // Try odd types
+    final byte A = 'A';
+    byte [] bytes = new byte[1];
+    bytes[0] = A;
+    Object obj = doType(conf, bytes, byte [].class);
+    assertTrue(((byte [])obj)[0] == A);
+    // Do 'known' Writable type.
+    obj = doType(conf, new Text(""), Text.class);
+    assertTrue(obj instanceof Text);
+    // Try type that should get transferred old fashion way.
+    obj = doType(conf, new StopRowFilter(new Text("")),
+        RowFilterInterface.class);
+    assertTrue(obj instanceof StopRowFilter);
+  }
+  
+  private Object doType(final HBaseConfiguration conf, final Object value,
+      final Class<?> clazz)
+  throws IOException {
+    ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
+    DataOutputStream out = new DataOutputStream(byteStream);
+    HbaseObjectWritable.writeObject(out, value, clazz, conf);
+    out.close();
+    ByteArrayInputStream bais =
+      new ByteArrayInputStream(byteStream.toByteArray());
+    DataInputStream dis = new DataInputStream(bais);
+    Object product = HbaseObjectWritable.readObject(dis, conf);
+    dis.close();
+    return product;
+  }
+}



Mime
View raw message