hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject [07/37] hive git commit: HIVE-17118. Move the hive-orc source files to make the package names unique.
Date Wed, 19 Jul 2017 16:58:30 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
new file mode 100644
index 0000000..8a6337d
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestRunLengthIntegerReader.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static junit.framework.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+public class TestRunLengthIntegerReader {
+
+  public void runSeekTest(CompressionCodec codec) throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 1000, codec, collect), true);
+    TestInStream.PositionCollector[] positions =
+        new TestInStream.PositionCollector[4096];
+    Random random = new Random(99);
+    int[] junk = new int[2048];
+    for(int i=0; i < junk.length; ++i) {
+      junk[i] = random.nextInt();
+    }
+    for(int i=0; i < 4096; ++i) {
+      positions[i] = new TestInStream.PositionCollector();
+      out.getPosition(positions[i]);
+      // test runs, incrementing runs, non-runs
+      if (i < 1024) {
+        out.write(i/4);
+      } else if (i < 2048) {
+        out.write(2*i);
+      } else {
+        out.write(junk[i-2048]);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+            codec, 1000), true);
+    for(int i=0; i < 2048; ++i) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+    for(int i=2047; i >= 0; --i) {
+      in.seek(positions[i]);
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i/4, x);
+      } else if (i < 2048) {
+        assertEquals(2*i, x);
+      } else {
+        assertEquals(junk[i-2048], x);
+      }
+    }
+  }
+
+  @Test
+  public void testUncompressedSeek() throws Exception {
+    runSeekTest(null);
+  }
+
+  @Test
+  public void testCompressedSeek() throws Exception {
+    runSeekTest(new ZlibCodec());
+  }
+
+  @Test
+  public void testSkips() throws Exception {
+    TestInStream.OutputCollector collect = new TestInStream.OutputCollector();
+    RunLengthIntegerWriter out = new RunLengthIntegerWriter(
+        new OutStream("test", 100, null, collect), true);
+    for(int i=0; i < 2048; ++i) {
+      if (i < 1024) {
+        out.write(i);
+      } else {
+        out.write(256 * i);
+      }
+    }
+    out.flush();
+    ByteBuffer inBuf = ByteBuffer.allocate(collect.buffer.size());
+    collect.buffer.setByteBuffer(inBuf, 0, collect.buffer.size());
+    inBuf.flip();
+    RunLengthIntegerReader in = new RunLengthIntegerReader(InStream.create
+        ("test", new ByteBuffer[]{inBuf}, new long[]{0}, inBuf.remaining(),
+            null, 100), true);
+    for(int i=0; i < 2048; i += 10) {
+      int x = (int) in.next();
+      if (i < 1024) {
+        assertEquals(i, x);
+      } else {
+        assertEquals(256 * i, x);
+      }
+      if (i < 2038) {
+        in.skip(9);
+      }
+      in.skip(0);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
new file mode 100644
index 0000000..cc963c8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSchemaEvolution.java
@@ -0,0 +1,480 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.RecordReader;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.apache.hive.orc.Reader;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+public class TestSchemaEvolution {
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  Configuration conf;
+  Path testFilePath;
+  FileSystem fs;
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  @Before
+  public void setup() throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testDataTypeConversion1() throws IOException {
+    TypeDescription fileStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+    assertFalse(same1.hasConversion());
+    TypeDescription readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertFalse(both1.hasConversion());
+    TypeDescription readerStruct1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+    assertTrue(both1diff.hasConversion());
+    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision, null);
+    assertTrue(both1diffPrecision.hasConversion());
+  }
+
+  @Test
+  public void testDataTypeConversion2() throws IOException {
+    TypeDescription fileStruct2 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution same2 = new SchemaEvolution(fileStruct2, null);
+    assertFalse(same2.hasConversion());
+    TypeDescription readerStruct2 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution both2 = new SchemaEvolution(fileStruct2, readerStruct2, null);
+    assertFalse(both2.hasConversion());
+    TypeDescription readerStruct2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createByte()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(100));
+    SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, null);
+    assertTrue(both2diff.hasConversion());
+    TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createUnion()
+            .addUnionChild(TypeDescription.createByte())
+            .addUnionChild(TypeDescription.createDecimal()
+                .withPrecision(20).withScale(10)))
+        .addField("f2", TypeDescription.createStruct()
+            .addField("f3", TypeDescription.createDate())
+            .addField("f4", TypeDescription.createDouble())
+            .addField("f5", TypeDescription.createBoolean()))
+        .addField("f6", TypeDescription.createChar().withMaxLength(80));
+    SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, null);
+    assertTrue(both2diffChar.hasConversion());
+  }
+
+  @Test
+  public void testFloatToDoubleEvolution() throws Exception {
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    TypeDescription schema = TypeDescription.createFloat();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+    DoubleColumnVector dcv = new DoubleColumnVector(1024);
+    batch.cols[0] = dcv;
+    batch.reset();
+    batch.size = 1;
+    dcv.vector[0] = 74.72f;
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+    TypeDescription schemaOnRead = TypeDescription.createDouble();
+    RecordReader rows = reader.rows(new Reader.Options().schema(schemaOnRead));
+    batch = schemaOnRead.createRowBatch();
+    rows.nextBatch(batch);
+    assertEquals(74.72, ((DoubleColumnVector) batch.cols[0]).vector[0], 0.00000000001);
+    rows.close();
+  }
+
+  @Test
+  public void testSafePpdEvaluation() throws IOException {
+    TypeDescription fileStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution same1 = new SchemaEvolution(fileStruct1, null);
+    assertTrue(same1.isPPDSafeConversion(0));
+    assertFalse(same1.hasConversion());
+    TypeDescription readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertFalse(both1.hasConversion());
+    assertTrue(both1.isPPDSafeConversion(0));
+    assertTrue(both1.isPPDSafeConversion(1));
+    assertTrue(both1.isPPDSafeConversion(2));
+    assertTrue(both1.isPPDSafeConversion(3));
+
+    // int -> long
+    TypeDescription readerStruct1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
+    SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, null);
+    assertTrue(both1diff.hasConversion());
+    assertFalse(both1diff.isPPDSafeConversion(0));
+    assertTrue(both1diff.isPPDSafeConversion(1));
+    assertTrue(both1diff.isPPDSafeConversion(2));
+    assertTrue(both1diff.isPPDSafeConversion(3));
+
+    // decimal(38,10) -> decimal(12, 10)
+    TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(12).withScale(10));
+    SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1, readerStruct1diffPrecision,
+        new boolean[] {true, false, false, true});
+    assertTrue(both1diffPrecision.hasConversion());
+    assertFalse(both1diffPrecision.isPPDSafeConversion(0));
+    assertFalse(both1diffPrecision.isPPDSafeConversion(1)); // column not included
+    assertFalse(both1diffPrecision.isPPDSafeConversion(2)); // column not included
+    assertFalse(both1diffPrecision.isPPDSafeConversion(3));
+
+    // add columns
+    readerStruct1 = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString())
+        .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10))
+        .addField("f4", TypeDescription.createBoolean());
+    both1 = new SchemaEvolution(fileStruct1, readerStruct1, null);
+    assertTrue(both1.hasConversion());
+    assertFalse(both1.isPPDSafeConversion(0));
+    assertTrue(both1.isPPDSafeConversion(1));
+    assertTrue(both1.isPPDSafeConversion(2));
+    assertTrue(both1.isPPDSafeConversion(3));
+    assertFalse(both1.isPPDSafeConversion(4));
+  }
+
+  @Test
+  public void testSafePpdEvaluationForInts() throws IOException {
+    // byte -> short -> int -> long
+    TypeDescription fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // byte -> short
+    TypeDescription readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // byte -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // byte -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> int -> long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion short -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // short -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // int -> long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion int -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion int -> short
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // int -> long
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // long
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // unsafe conversion long -> byte
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion long -> short
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // unsafe conversion long -> int
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createFloat());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createTimestamp());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+  }
+
+  @Test
+  public void testSafePpdEvaluationForStrings() throws IOException {
+    TypeDescription fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    SchemaEvolution schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // string -> char
+    TypeDescription readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // string -> varchar
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // char -> string
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // char -> varchar
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    fileSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar());
+    schemaEvolution = new SchemaEvolution(fileSchema, null);
+    assertTrue(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.hasConversion());
+
+    // varchar -> string
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertTrue(schemaEvolution.isPPDSafeConversion(1));
+
+    // varchar -> char
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createDecimal());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createDate());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+
+    // invalid
+    readerSchema = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt());
+    schemaEvolution = new SchemaEvolution(fileSchema, readerSchema, null);
+    assertTrue(schemaEvolution.hasConversion());
+    assertFalse(schemaEvolution.isPPDSafeConversion(0));
+    assertFalse(schemaEvolution.isPPDSafeConversion(1));
+  }
+
+  @Test
+  public void ensureFileIncluded() throws IOException {
+    TypeDescription file = TypeDescription.fromString("struct<x:int,y:int>");
+    SchemaEvolution evolution = new SchemaEvolution(file, null);
+    boolean[] include = evolution.getFileIncluded();
+    assertEquals(3, include.length);
+    for(int i=0; i < include.length; ++i) {
+      assertTrue("element " + i, include[i]);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
new file mode 100644
index 0000000..5bcee60
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestSerializationUtils.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.orc.impl;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.util.Random;
+
+import org.junit.Test;
+
+import com.google.common.math.LongMath;
+
+public class TestSerializationUtils {
+
+  private InputStream fromBuffer(ByteArrayOutputStream buffer) {
+    return new ByteArrayInputStream(buffer.toByteArray());
+  }
+
+  @Test
+  public void testDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    utils.writeDouble(buffer, 1343822337.759);
+    assertEquals(1343822337.759, utils.readDouble(fromBuffer(buffer)), tolerance);
+    buffer = new ByteArrayOutputStream();
+    utils.writeDouble(buffer, 0.8);
+    double got = utils.readDouble(fromBuffer(buffer));
+    assertEquals(0.8, got, tolerance);
+  }
+
+  @Test
+  public void testBigIntegers() throws Exception {
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(0));
+    assertArrayEquals(new byte[]{0}, buffer.toByteArray());
+    assertEquals(0L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(1));
+    assertArrayEquals(new byte[]{2}, buffer.toByteArray());
+    assertEquals(1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-1));
+    assertArrayEquals(new byte[]{1}, buffer.toByteArray());
+    assertEquals(-1L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(50));
+    assertArrayEquals(new byte[]{100}, buffer.toByteArray());
+    assertEquals(50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(-50));
+    assertArrayEquals(new byte[]{99}, buffer.toByteArray());
+    assertEquals(-50L,
+        SerializationUtils.readBigInteger(fromBuffer(buffer)).longValue());
+    for(int i=-8192; i < 8192; ++i) {
+      buffer.reset();
+        SerializationUtils.writeBigInteger(buffer, BigInteger.valueOf(i));
+      assertEquals("compare length for " + i,
+            i >= -64 && i < 64 ? 1 : 2, buffer.size());
+      assertEquals("compare result for " + i,
+          i, SerializationUtils.readBigInteger(fromBuffer(buffer)).intValue());
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("123456789abcdef0",16));
+    assertEquals(new BigInteger("123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("-123456789abcdef0",16));
+    assertEquals(new BigInteger("-123456789abcdef0",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    StringBuilder buf = new StringBuilder();
+    for(int i=0; i < 256; ++i) {
+      String num = Integer.toHexString(i);
+      if (num.length() == 1) {
+        buf.append('0');
+      }
+      buf.append(num);
+    }
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger(buf.toString(),16));
+    assertEquals(new BigInteger(buf.toString(),16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+    buffer.reset();
+    SerializationUtils.writeBigInteger(buffer,
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16));
+    assertEquals(
+        new BigInteger("ff000000000000000000000000000000000000000000ff",16),
+        SerializationUtils.readBigInteger(fromBuffer(buffer)));
+  }
+
+  @Test
+  public void testSubtractionOverflow() {
+    // cross check results with Guava results below
+    SerializationUtils utils = new SerializationUtils();
+    assertEquals(false, utils.isSafeSubtract(22222222222L, Long.MIN_VALUE));
+    assertEquals(false, utils.isSafeSubtract(-22222222222L, Long.MAX_VALUE));
+    assertEquals(false, utils.isSafeSubtract(Long.MIN_VALUE, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(-1553103058346370095L, 6553103058346370095L));
+    assertEquals(true, utils.isSafeSubtract(0, Long.MAX_VALUE));
+    assertEquals(true, utils.isSafeSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testSubtractionOverflowGuava() {
+    try {
+      LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    try {
+      LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE);
+      fail("expected ArithmeticException for overflow");
+    } catch (ArithmeticException ex) {
+      assertEquals(ex.getMessage(), "overflow");
+    }
+
+    assertEquals(-8106206116692740190L,
+        LongMath.checkedSubtract(-1553103058346370095L, 6553103058346370095L));
+    assertEquals(-Long.MAX_VALUE, LongMath.checkedSubtract(0, Long.MAX_VALUE));
+    assertEquals(Long.MIN_VALUE, LongMath.checkedSubtract(Long.MIN_VALUE, 0));
+  }
+
+  @Test
+  public void testRandomFloats() throws Exception {
+    float tolerance = 0.0000000000000001f;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    float[] expected = new float[n];
+    for (int i = 0; i < n; i++) {
+      float f = rand.nextFloat();
+      expected[i] = f;
+      utils.writeFloat(buffer, f);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      float got = utils.readFloat(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+
+  @Test
+  public void testRandomDoubles() throws Exception {
+    double tolerance = 0.0000000000000001;
+    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+    SerializationUtils utils = new SerializationUtils();
+    Random rand = new Random();
+    int n = 100_000;
+    double[] expected = new double[n];
+    for (int i = 0; i < n; i++) {
+      double d = rand.nextDouble();
+      expected[i] = d;
+      utils.writeDouble(buffer, d);
+    }
+    InputStream newBuffer = fromBuffer(buffer);
+    for (int i = 0; i < n; i++) {
+      double got = utils.readDouble(newBuffer);
+      assertEquals(expected[i], got, tolerance);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
new file mode 100644
index 0000000..4aed06c
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStreamName.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.OrcProto;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestStreamName {
+
+  @Test
+  public void test1() throws Exception {
+    StreamName s1 = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    StreamName s2 = new StreamName(3,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s3 = new StreamName(5, OrcProto.Stream.Kind.DATA);
+    StreamName s4 = new StreamName(5,
+        OrcProto.Stream.Kind.DICTIONARY_DATA);
+    StreamName s1p = new StreamName(3, OrcProto.Stream.Kind.DATA);
+    assertEquals(true, s1.equals(s1));
+    assertEquals(false, s1.equals(s2));
+    assertEquals(false, s1.equals(s3));
+    assertEquals(true, s1.equals(s1p));
+    assertEquals(true, s1.compareTo(null) < 0);
+    assertEquals(false, s1.equals(null));
+    assertEquals(true, s1.compareTo(s2) < 0);
+    assertEquals(true, s2.compareTo(s3) < 0);
+    assertEquals(true, s3.compareTo(s4) < 0);
+    assertEquals(true, s4.compareTo(s1p) > 0);
+    assertEquals(0, s1p.compareTo(s1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
new file mode 100644
index 0000000..c3e51b8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestStringRedBlackTree.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IntWritable;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static junit.framework.Assert.assertEquals;
+
+/**
+ * Test the red-black tree with string keys.
+ */
+public class TestStringRedBlackTree {
+
+  /**
+   * Checks the red-black tree rules to make sure that we have correctly built
+   * a valid tree.
+   *
+   * Properties:
+   *   1. Red nodes must have black children
+   *   2. Each node must have the same black height on both sides.
+   *
+   * @param node The id of the root of the subtree to check for the red-black
+   *        tree properties.
+   * @return The black-height of the subtree.
+   */
+  private int checkSubtree(RedBlackTree tree, int node, IntWritable count
+                          ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      return 1;
+    }
+    count.set(count.get() + 1);
+    boolean is_red = tree.isRed(node);
+    int left = tree.getLeft(node);
+    int right = tree.getRight(node);
+    if (is_red) {
+      if (tree.isRed(left)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Left node of " + node + " is " + left +
+          " and both are red.");
+      }
+      if (tree.isRed(right)) {
+        printTree(tree, "", tree.root);
+        throw new IllegalStateException("Right node of " + node + " is " +
+          right + " and both are red.");
+      }
+    }
+    int left_depth = checkSubtree(tree, left, count);
+    int right_depth = checkSubtree(tree, right, count);
+    if (left_depth != right_depth) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Lopsided tree at node " + node +
+        " with depths " + left_depth + " and " + right_depth);
+    }
+    if (is_red) {
+      return left_depth;
+    } else {
+      return left_depth + 1;
+    }
+  }
+
+  /**
+   * Checks the validity of the entire tree. Also ensures that the number of
+   * nodes visited is the same as the size of the set.
+   */
+  void checkTree(RedBlackTree tree) throws IOException {
+    IntWritable count = new IntWritable(0);
+    if (tree.isRed(tree.root)) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("root is red");
+    }
+    checkSubtree(tree, tree.root, count);
+    if (count.get() != tree.size) {
+      printTree(tree, "", tree.root);
+      throw new IllegalStateException("Broken tree! visited= " + count.get() +
+        " size=" + tree.size);
+    }
+  }
+
+  void printTree(RedBlackTree tree, String indent, int node
+                ) throws IOException {
+    if (node == RedBlackTree.NULL) {
+      System.err.println(indent + "NULL");
+    } else {
+      System.err.println(indent + "Node " + node + " color " +
+        (tree.isRed(node) ? "red" : "black"));
+      printTree(tree, indent + "  ", tree.getLeft(node));
+      printTree(tree, indent + "  ", tree.getRight(node));
+    }
+  }
+
+  private static class MyVisitor implements StringRedBlackTree.Visitor {
+    private final String[] words;
+    private final int[] order;
+    private final DataOutputBuffer buffer = new DataOutputBuffer();
+    int current = 0;
+
+    MyVisitor(String[] args, int[] order) {
+      words = args;
+      this.order = order;
+    }
+
+    @Override
+    public void visit(StringRedBlackTree.VisitorContext context
+                     ) throws IOException {
+      String word = context.getText().toString();
+      assertEquals("in word " + current, words[current], word);
+      assertEquals("in word " + current, order[current],
+        context.getOriginalPosition());
+      buffer.reset();
+      context.writeBytes(buffer);
+      assertEquals(word, new String(buffer.getData(),0,buffer.getLength()));
+      current += 1;
+    }
+  }
+
+  void checkContents(StringRedBlackTree tree, int[] order,
+                     String... params
+                    ) throws IOException {
+    tree.visit(new MyVisitor(params, order));
+  }
+
+  StringRedBlackTree buildTree(String... params) throws IOException {
+    StringRedBlackTree result = new StringRedBlackTree(1000);
+    for(String word: params) {
+      result.add(word);
+      checkTree(result);
+    }
+    return result;
+  }
+
+  @Test
+  public void test1() throws Exception {
+    StringRedBlackTree tree = new StringRedBlackTree(5);
+    assertEquals(0, tree.getSizeInBytes());
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(0, tree.add("owen"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(2, tree.add("alan"));
+    checkTree(tree);
+    assertEquals(1, tree.add("ashutosh"));
+    checkTree(tree);
+    assertEquals(3, tree.add("greg"));
+    checkTree(tree);
+    assertEquals(4, tree.add("eric"));
+    checkTree(tree);
+    assertEquals(5, tree.add("arun"));
+    checkTree(tree);
+    assertEquals(6, tree.size());
+    checkTree(tree);
+    assertEquals(6, tree.add("eric14"));
+    checkTree(tree);
+    assertEquals(7, tree.add("o"));
+    checkTree(tree);
+    assertEquals(8, tree.add("ziggy"));
+    checkTree(tree);
+    assertEquals(9, tree.add("z"));
+    checkTree(tree);
+    checkContents(tree, new int[]{2,5,1,4,6,3,7,0,9,8},
+      "alan", "arun", "ashutosh", "eric", "eric14", "greg",
+      "o", "owen", "z", "ziggy");
+    assertEquals(32888, tree.getSizeInBytes());
+    // check that adding greg again bumps the count
+    assertEquals(3, tree.add("greg"));
+    assertEquals(41, tree.getCharacterSize());
+    // add some more strings to test the different branches of the
+    // rebalancing
+    assertEquals(10, tree.add("zak"));
+    checkTree(tree);
+    assertEquals(11, tree.add("eric1"));
+    checkTree(tree);
+    assertEquals(12, tree.add("ash"));
+    checkTree(tree);
+    assertEquals(13, tree.add("harry"));
+    checkTree(tree);
+    assertEquals(14, tree.add("john"));
+    checkTree(tree);
+    tree.clear();
+    checkTree(tree);
+    assertEquals(0, tree.getSizeInBytes());
+    assertEquals(0, tree.getCharacterSize());
+  }
+
+  @Test
+  public void test2() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+        "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{0,1,2, 3,4,5, 6,7,8, 9,10,11, 12,13,14,
+      15,16,17, 18,19,20, 21,22,23, 24,25},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j","k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+
+  @Test
+  public void test3() throws Exception {
+    StringRedBlackTree tree =
+      buildTree("z", "y", "x", "w", "v", "u", "t", "s", "r", "q", "p", "o", "n",
+        "m", "l", "k", "j", "i", "h", "g", "f", "e", "d", "c", "b", "a");
+    assertEquals(26, tree.size());
+    checkContents(tree, new int[]{25,24,23, 22,21,20, 19,18,17, 16,15,14,
+      13,12,11, 10,9,8, 7,6,5, 4,3,2, 1,0},
+      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
+      "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/impl/TestZlib.java b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
new file mode 100644
index 0000000..c87f4a8
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/impl/TestZlib.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.impl;
+
+import org.apache.hive.orc.CompressionCodec;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.fail;
+
+public class TestZlib {
+
+  @Test
+  public void testNoOverflow() throws Exception {
+    ByteBuffer in = ByteBuffer.allocate(10);
+    ByteBuffer out = ByteBuffer.allocate(10);
+    in.put(new byte[]{1,2,3,4,5,6,7,10});
+    in.flip();
+    CompressionCodec codec = new ZlibCodec();
+    assertEquals(false, codec.compress(in, out, null));
+  }
+
+  @Test
+  public void testCorrupt() throws Exception {
+    ByteBuffer buf = ByteBuffer.allocate(1000);
+    buf.put(new byte[]{127,-128,0,99,98,-1});
+    buf.flip();
+    CompressionCodec codec = new ZlibCodec();
+    ByteBuffer out = ByteBuffer.allocate(1000);
+    try {
+      codec.decompress(buf, out);
+      fail();
+    } catch (IOException ioe) {
+      // EXPECTED
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
new file mode 100644
index 0000000..50e6208
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestFileDump.java
@@ -0,0 +1,485 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFileDump {
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static TypeDescription getMyRecordType() {
+    return TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+  }
+
+  static void appendMyRecord(VectorizedRowBatch batch,
+                             int i,
+                             long l,
+                             String str) {
+    ((LongColumnVector) batch.cols[0]).vector[batch.size] = i;
+    ((LongColumnVector) batch.cols[1]).vector[batch.size] = l;
+    if (str == null) {
+      batch.cols[2].noNulls = false;
+      batch.cols[2].isNull[batch.size] = true;
+    } else {
+      ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+          str.getBytes());
+    }
+    batch.size += 1;
+  }
+
+  static TypeDescription getAllTypesType() {
+    return TypeDescription.createStruct()
+        .addField("b", TypeDescription.createBoolean())
+        .addField("bt", TypeDescription.createByte())
+        .addField("s", TypeDescription.createShort())
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("f", TypeDescription.createFloat())
+        .addField("d", TypeDescription.createDouble())
+        .addField("de", TypeDescription.createDecimal())
+        .addField("t", TypeDescription.createTimestamp())
+        .addField("dt", TypeDescription.createDate())
+        .addField("str", TypeDescription.createString())
+        .addField("c", TypeDescription.createChar().withMaxLength(5))
+        .addField("vc", TypeDescription.createVarchar().withMaxLength(10))
+        .addField("m", TypeDescription.createMap(
+            TypeDescription.createString(),
+            TypeDescription.createString()))
+        .addField("a", TypeDescription.createList(TypeDescription.createInt()))
+        .addField("st", TypeDescription.createStruct()
+                .addField("i", TypeDescription.createInt())
+                .addField("s", TypeDescription.createString()));
+  }
+
+  static void appendAllTypes(VectorizedRowBatch batch,
+                             boolean b,
+                             byte bt,
+                             short s,
+                             int i,
+                             long l,
+                             float f,
+                             double d,
+                             HiveDecimalWritable de,
+                             Timestamp t,
+                             DateWritable dt,
+                             String str,
+                             String c,
+                             String vc,
+                             Map<String, String> m,
+                             List<Integer> a,
+                             int sti,
+                             String sts) {
+    int row = batch.size++;
+    ((LongColumnVector) batch.cols[0]).vector[row] = b ? 1 : 0;
+    ((LongColumnVector) batch.cols[1]).vector[row] = bt;
+    ((LongColumnVector) batch.cols[2]).vector[row] = s;
+    ((LongColumnVector) batch.cols[3]).vector[row] = i;
+    ((LongColumnVector) batch.cols[4]).vector[row] = l;
+    ((DoubleColumnVector) batch.cols[5]).vector[row] = f;
+    ((DoubleColumnVector) batch.cols[6]).vector[row] = d;
+    ((DecimalColumnVector) batch.cols[7]).vector[row].set(de);
+    ((TimestampColumnVector) batch.cols[8]).set(row, t);
+    ((LongColumnVector) batch.cols[9]).vector[row] = dt.getDays();
+    ((BytesColumnVector) batch.cols[10]).setVal(row, str.getBytes());
+    ((BytesColumnVector) batch.cols[11]).setVal(row, c.getBytes());
+    ((BytesColumnVector) batch.cols[12]).setVal(row, vc.getBytes());
+    MapColumnVector map = (MapColumnVector) batch.cols[13];
+    int offset = map.childCount;
+    map.offsets[row] = offset;
+    map.lengths[row] = m.size();
+    map.childCount += map.lengths[row];
+    for(Map.Entry<String, String> entry: m.entrySet()) {
+      ((BytesColumnVector) map.keys).setVal(offset, entry.getKey().getBytes());
+      ((BytesColumnVector) map.values).setVal(offset++,
+          entry.getValue().getBytes());
+    }
+    ListColumnVector list = (ListColumnVector) batch.cols[14];
+    offset = list.childCount;
+    list.offsets[row] = offset;
+    list.lengths[row] = a.size();
+    list.childCount += list.lengths[row];
+    for(int e=0; e < a.size(); ++e) {
+      ((LongColumnVector) list.child).vector[offset + e] = a.get(e);
+    }
+    StructColumnVector struct = (StructColumnVector) batch.cols[15];
+    ((LongColumnVector) struct.fields[0]).vector[row] = sti;
+    ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
+  }
+
+  public static void checkOutput(String expected,
+                                 String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader
+            (TestJsonFileDump.getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine().trim();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine().trim();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      Assert.assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+      expectedLine = expectedLine == null ? null : expectedLine.trim();
+    }
+    Assert.assertNull(eStream.readLine());
+    Assert.assertNull(aStream.readLine());
+    eStream.close();
+    aStream.close();
+  }
+
+  @Test
+  public void testDump() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .compress(CompressionKind.ZLIB)
+            .stripeSize(100000)
+            .rowIndexStride(1000));
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testDataDump() throws Exception {
+    TypeDescription schema = getAllTypesType();
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.NONE)
+            .bufferSize(10000)
+            .rowIndexStride(1000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Map<String, String> m = new HashMap<String, String>(2);
+    m.put("k1", "v1");
+    appendAllTypes(batch,
+        true,
+        (byte) 10,
+        (short) 100,
+        1000,
+        10000L,
+        4.0f,
+        20.0,
+        new HiveDecimalWritable("4.2222"),
+        new Timestamp(1416967764000L),
+        new DateWritable(new Date(1416967764000L)),
+        "string",
+        "hello",
+       "hello",
+        m,
+        Arrays.asList(100, 200),
+        10, "foo");
+    m.clear();
+    m.put("k3", "v3");
+    appendAllTypes(
+        batch,
+        false,
+        (byte)20,
+        (short)200,
+        2000,
+        20000L,
+        8.0f,
+        40.0,
+        new HiveDecimalWritable("2.2222"),
+        new Timestamp(1416967364000L),
+        new DateWritable(new Date(1411967764000L)),
+        "abcd",
+        "world",
+        "world",
+        m,
+        Arrays.asList(200, 300),
+        20, "bar");
+    writer.addRowBatch(batch);
+
+    writer.close();
+    PrintStream origOut = System.out;
+    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-d"});
+    System.out.flush();
+    System.setOut(origOut);
+    String[] lines = myOut.toString().split("\n");
+    Assert.assertEquals("{\"b\":true,\"bt\":10,\"s\":100,\"i\":1000,\"l\":10000,\"f\":4,\"d\":20,\"de\":\"4.2222\",\"t\":\"2014-11-25 18:09:24.0\",\"dt\":\"2014-11-25\",\"str\":\"string\",\"c\":\"hello\",\"vc\":\"hello\",\"m\":[{\"_key\":\"k1\",\"_value\":\"v1\"}],\"a\":[100,200],\"st\":{\"i\":10,\"s\":\"foo\"}}", lines[0]);
+    Assert.assertEquals("{\"b\":false,\"bt\":20,\"s\":200,\"i\":2000,\"l\":20000,\"f\":8,\"d\":40,\"de\":\"2.2222\",\"t\":\"2014-11-25 18:02:44.0\",\"dt\":\"2014-09-28\",\"str\":\"abcd\",\"c\":\"world\",\"vc\":\"world\",\"m\":[{\"_key\":\"k3\",\"_value\":\"v3\"}],\"a\":[200,300],\"st\":{\"i\":20,\"s\":\"bar\"}}", lines[1]);
+  }
+
+  // Test that if the fraction of rows that have distinct strings is greater than the configured
+  // threshold dictionary encoding is turned off.  If dictionary encoding is turned off the length
+  // of the dictionary stream for the column will be 0 in the ORC file dump.
+  @Test
+  public void testDictionaryThreshold() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    Configuration conf = new Configuration();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    conf.setFloat(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), 0.49f);
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .fileSystem(fs)
+            .setSchema(schema)
+            .stripeSize(100000)
+            .compress(CompressionKind.ZLIB)
+            .rowIndexStride(1000)
+            .bufferSize(10000));
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    int nextInt = 0;
+    for(int i=0; i < 21000; ++i) {
+      // Write out the same string twice, this guarantees the fraction of rows with
+      // distinct strings is 0.5
+      if (i % 2 == 0) {
+        nextInt = r1.nextInt(words.length);
+        // Append the value of i to the word, this guarantees when an index or word is repeated
+        // the actual string is unique.
+        words[nextInt] += "-" + i;
+      }
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(), words[nextInt]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-dictionary-threshold.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=1,2,3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("S");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+
+  @Test
+  public void testBloomFilter2() throws Exception {
+    TypeDescription schema = getMyRecordType();
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("l")
+        .bloomFilterFpp(0.01);
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    for(int i=0; i < 21000; ++i) {
+      appendMyRecord(batch, r1.nextInt(), r1.nextLong(),
+          words[r1.nextInt(words.length)]);
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump-bloomfilter2.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "--rowindex=2"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/df8921d8/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
----------------------------------------------------------------------
diff --git a/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
new file mode 100644
index 0000000..efded7a
--- /dev/null
+++ b/orc/src/test/org/apache/hive/orc/tools/TestJsonFileDump.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.orc.tools;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.PrintStream;
+import java.net.URL;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hive.orc.OrcFile;
+import org.apache.hive.orc.CompressionKind;
+import org.apache.hive.orc.OrcConf;
+import org.apache.hive.orc.TypeDescription;
+import org.apache.hive.orc.Writer;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestJsonFileDump {
+  public static String getFileFromClasspath(String name) {
+    URL url = ClassLoader.getSystemResource(name);
+    if (url == null) {
+      throw new IllegalArgumentException("Could not find " + name);
+    }
+    return url.getPath();
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir"));
+  Configuration conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Before
+  public void openFileSystem () throws Exception {
+    conf = new Configuration();
+    fs = FileSystem.getLocal(conf);
+    fs.setWorkingDirectory(workDir);
+    testFilePath = new Path("TestFileDump.testDump.orc");
+    fs.delete(testFilePath, false);
+  }
+
+  static void checkOutput(String expected,
+                                  String actual) throws Exception {
+    BufferedReader eStream =
+        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
+    BufferedReader aStream =
+        new BufferedReader(new FileReader(actual));
+    String expectedLine = eStream.readLine();
+    while (expectedLine != null) {
+      String actualLine = aStream.readLine();
+      System.out.println("actual:   " + actualLine);
+      System.out.println("expected: " + expectedLine);
+      assertEquals(expectedLine, actualLine);
+      expectedLine = eStream.readLine();
+    }
+    assertNull(eStream.readLine());
+    assertNull(aStream.readLine());
+  }
+
+  @Test
+  public void testJsonDump() throws Exception {
+    TypeDescription schema = TypeDescription.createStruct()
+        .addField("i", TypeDescription.createInt())
+        .addField("l", TypeDescription.createLong())
+        .addField("s", TypeDescription.createString());
+    conf.set(OrcConf.ENCODING_STRATEGY.getAttribute(), "COMPRESSION");
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf)
+        .fileSystem(fs)
+        .setSchema(schema)
+        .stripeSize(100000)
+        .compress(CompressionKind.ZLIB)
+        .bufferSize(10000)
+        .rowIndexStride(1000)
+        .bloomFilterColumns("s");
+    Writer writer = OrcFile.createWriter(testFilePath, options);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    VectorizedRowBatch batch = schema.createRowBatch(1000);
+    for(int i=0; i < 21000; ++i) {
+      ((LongColumnVector) batch.cols[0]).vector[batch.size] = r1.nextInt();
+      ((LongColumnVector) batch.cols[1]).vector[batch.size] = r1.nextLong();
+      if (i % 100 == 0) {
+        batch.cols[2].noNulls = false;
+        batch.cols[2].isNull[batch.size] = true;
+      } else {
+        ((BytesColumnVector) batch.cols[2]).setVal(batch.size,
+            words[r1.nextInt(words.length)].getBytes());
+      }
+      batch.size += 1;
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size > 0) {
+      writer.addRowBatch(batch);
+    }
+
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = "orc-file-dump.json";
+    FileOutputStream myOut = new FileOutputStream(workDir + File.separator + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString(), "-j", "-p", "--rowindex=3"});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+  }
+}


Mime
View raw message