Author: tomwhite Date: Tue Jan 5 00:06:09 2010 New Revision: 895831 URL: http://svn.apache.org/viewvc?rev=895831&view=rev Log: HADOOP-6443. Serialization classes accept invalid metadata. Contributed by Aaron Kimball. Modified: hadoop/common/trunk/CHANGES.txt hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/SerializationBase.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java Modified: hadoop/common/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/CHANGES.txt (original) +++ hadoop/common/trunk/CHANGES.txt Tue Jan 5 00:06:09 2010 @@ -80,6 +80,9 @@ HADOOP-6472. add tokenCache option to GenericOptionsParser for passing file with secret keys to a map reduce job. (boryas) + HADOOP-6443. Serialization classes accept invalid metadata. + (Aaron Kimball via tomwhite) + OPTIMIZATIONS BUG FIXES Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java Tue Jan 5 00:06:09 2010 @@ -99,9 +99,7 @@ } public boolean accept(Map metadata) { - String intendedSerializer = metadata.get(SERIALIZATION_KEY); - if (intendedSerializer != null && - !getClass().getName().equals(intendedSerializer)) { + if (!checkSerializationKey(metadata)) { return false; } Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/SerializationBase.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/SerializationBase.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/SerializationBase.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/SerializationBase.java Tue Jan 5 00:06:09 2010 @@ -101,4 +101,17 @@ * for this given metadata. */ public abstract RawComparator getRawComparator(Map metadata); + + /** + * Check that the SERIALIZATION_KEY, if set, matches the current class. + * @param metadata the serialization metadata to check. + * @return true if SERIALIZATION_KEY is unset, or if it matches the current class + * (meaning that accept() should continue processing), or false if it is a mismatch, + * meaning that accept() should return false. + */ + protected boolean checkSerializationKey(Map metadata) { + String intendedSerializer = metadata.get(SERIALIZATION_KEY); + return intendedSerializer == null || + getClass().getName().equals(intendedSerializer); + } } Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java Tue Jan 5 00:06:09 2010 @@ -135,11 +135,10 @@ @Override public boolean accept(Map metadata) { - String intendedSerializer = metadata.get(SERIALIZATION_KEY); - if (intendedSerializer != null && - !getClass().getName().equals(intendedSerializer)) { + if (!checkSerializationKey(metadata)) { return false; } + Class c = getClassFromMetadata(metadata); return c == null ? false : Writable.class.isAssignableFrom(c); } Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java Tue Jan 5 00:06:09 2010 @@ -30,16 +30,18 @@ /** * Serialization for Avro Generic classes. For a class to be accepted by this - * serialization it must have metadata with key - * {@link SerializationBase#SERIALIZATION_KEY} set to {@link AvroGenericSerialization}'s - * fully-qualified classname. + * serialization it must have a schema specified. * The schema used is the one set by {@link AvroSerialization#AVRO_SCHEMA_KEY}. */ @SuppressWarnings("unchecked") public class AvroGenericSerialization extends AvroSerialization { - + @Override public boolean accept(Map metadata) { + if (!checkSerializationKey(metadata)) { + return false; + } + return metadata.get(AVRO_SCHEMA_KEY) != null; } Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java Tue Jan 5 00:06:09 2010 @@ -54,8 +54,8 @@ if (packages == null) { getPackages(); } - if (getClass().getName().equals(metadata.get(SERIALIZATION_KEY))) { - return true; + if (!checkSerializationKey(metadata)) { + return false; } Class c = getClassFromMetadata(metadata); if (c == null) { Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java Tue Jan 5 00:06:09 2010 @@ -141,8 +141,7 @@ * @return a RawComparator parameterized for the specified Avro schema. */ public RawComparator getRawComparator(Map metadata) { - Schema schema = Schema.parse(metadata.get(AVRO_SCHEMA_KEY)); + Schema schema = getSchema(metadata); return new AvroComparator(schema); } - } Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java Tue Jan 5 00:06:09 2010 @@ -39,8 +39,8 @@ @Override public boolean accept(Map metadata) { - if (getClass().getName().equals(metadata.get(SERIALIZATION_KEY))) { - return true; + if (!checkSerializationKey(metadata)) { + return false; } Class c = getClassFromMetadata(metadata); return c == null ? false : SpecificRecord.class.isAssignableFrom(c); Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java (original) +++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java Tue Jan 5 00:06:09 2010 @@ -23,15 +23,20 @@ import junit.framework.TestCase; import java.io.IOException; +import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.TestGenericWritable.Foo; import org.apache.hadoop.io.TestGenericWritable.Bar; import org.apache.hadoop.io.TestGenericWritable.Baz; import org.apache.hadoop.io.TestGenericWritable.FooGenericWritable; +import org.apache.hadoop.io.serializer.DeserializerBase; +import org.apache.hadoop.io.serializer.SerializationBase; +import org.apache.hadoop.io.serializer.SerializerBase; import org.apache.hadoop.util.GenericsUtil; public class TestWritableSerialization extends TestCase { @@ -62,6 +67,26 @@ } @SuppressWarnings("unchecked") + public void testIgnoreMisconfiguredMetadata() throws IOException { + // If SERIALIZATION_KEY is set, still need class name. + + Configuration conf = new Configuration(); + Map metadata = new HashMap(); + metadata.put(SerializationBase.SERIALIZATION_KEY, + WritableSerialization.class.getName()); + SerializationFactory factory = new SerializationFactory(conf); + SerializationBase serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.CLASS_KEY, + Text.class.getName()); + serialization = factory.getSerialization(metadata); + assertNotNull("Didn't get serialization!", serialization); + assertTrue("Wrong serialization class", + serialization instanceof WritableSerialization); + } + + @SuppressWarnings("unchecked") public void testReuseSerializer() throws IOException { // Test that we can write multiple objects of the same type // through the same serializer. @@ -112,4 +137,46 @@ barSerializer.close(); out.reset(); } + + + // Test the SerializationBase.checkSerializationKey() method. + class DummySerializationBase extends SerializationBase { + public boolean accept(Map metadata) { + return checkSerializationKey(metadata); + } + + public SerializerBase getSerializer(Map metadata) { + return null; + } + + public DeserializerBase getDeserializer(Map metadata) { + return null; + } + + public RawComparator getRawComparator(Map metadata) { + return null; + } + } + + public void testSerializationKeyCheck() { + DummySerializationBase dummy = new DummySerializationBase(); + Map metadata = new HashMap(); + + assertTrue("Didn't accept empty metadata", dummy.accept(metadata)); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + DummySerializationBase.class.getName()); + assertTrue("Didn't accept valid metadata", dummy.accept(metadata)); + + metadata.put(SerializationBase.SERIALIZATION_KEY, "foo"); + assertFalse("Accepted invalid metadata", dummy.accept(metadata)); + + try { + dummy.accept((Map) null); + // Shouldn't get here! + fail("Somehow didn't actually test the method we expected"); + } catch (NullPointerException npe) { + // expected this. + } + } } Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java?rev=895831&r1=895830&r2=895831&view=diff ============================================================================== --- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java (original) +++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java Tue Jan 5 00:06:09 2010 @@ -26,12 +26,38 @@ import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.serializer.SerializationBase; +import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.io.serializer.SerializationTestUtil; public class TestAvroSerialization extends TestCase { private static final Configuration conf = new Configuration(); + @SuppressWarnings("unchecked") + public void testIgnoreMisconfiguredMetadata() { + // If SERIALIZATION_KEY is set, still need class name. + + Configuration conf = new Configuration(); + Map metadata = new HashMap(); + SerializationFactory factory = new SerializationFactory(conf); + SerializationBase serialization = null; + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroGenericSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroReflectSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroSpecificSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + } + public void testSpecific() throws Exception { AvroRecord before = new AvroRecord(); before.intField = 5;