parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [40/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet.
Date Mon, 27 Apr 2015 23:12:37 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
new file mode 100644
index 0000000..bd938ee
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
@@ -0,0 +1,135 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.plain;
+
+import static org.apache.parquet.Log.DEBUG;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.bytes.LittleEndianDataInputStream;
+import org.apache.parquet.column.values.ValuesReader;
+import org.apache.parquet.io.ParquetDecodingException;
+
+/**
+ * Plain encoding for float, double, int, long
+ *
+ * @author Julien Le Dem
+ *
+ */
+abstract public class PlainValuesReader extends ValuesReader {
+  private static final Log LOG = Log.getLog(PlainValuesReader.class);
+
+  protected LittleEndianDataInputStream in;
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.values.ValuesReader#initFromPage(byte[], int)
+   */
+  @Override
+  public void initFromPage(int valueCount, byte[] in, int offset) throws IOException {
+    if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
+    this.in = new LittleEndianDataInputStream(new ByteArrayInputStream(in, offset, in.length - offset));
+  }
+
+  public static class DoublePlainValuesReader extends PlainValuesReader {
+
+    @Override
+    public void skip() {
+      try {
+        in.skipBytes(8);
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not skip double", e);
+      }
+    }
+
+    @Override
+    public double readDouble() {
+      try {
+        return in.readDouble();
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not read double", e);
+      }
+    }
+  }
+
+  public static class FloatPlainValuesReader extends PlainValuesReader {
+
+    @Override
+    public void skip() {
+      try {
+        in.skipBytes(4);
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not skip float", e);
+      }
+    }
+
+    @Override
+    public float readFloat() {
+      try {
+        return in.readFloat();
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not read float", e);
+      }
+    }
+  }
+
+  public static class IntegerPlainValuesReader extends PlainValuesReader {
+
+    @Override
+    public void skip() {
+      try {
+        in.skipBytes(4);
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not skip int", e);
+      }
+    }
+
+    @Override
+    public int readInteger() {
+      try {
+        return in.readInt();
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not read int", e);
+      }
+    }
+  }
+
+  public static class LongPlainValuesReader extends PlainValuesReader {
+
+    @Override
+    public void skip() {
+      try {
+        in.skipBytes(8);
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not skip long", e);
+      }
+    }
+
+    @Override
+    public long readLong() {
+      try {
+        return in.readLong();
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not read long", e);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java
new file mode 100644
index 0000000..f33bd81
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesWriter.java
@@ -0,0 +1,143 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.plain;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
+import org.apache.parquet.bytes.LittleEndianDataOutputStream;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.io.ParquetEncodingException;
+import org.apache.parquet.io.api.Binary;
+
+/**
+ * Plain encoding except for booleans
+ *
+ * @author Julien Le Dem
+ *
+ */
+public class PlainValuesWriter extends ValuesWriter {
+  private static final Log LOG = Log.getLog(PlainValuesWriter.class);
+
+  public static final Charset CHARSET = Charset.forName("UTF-8");
+
+  private CapacityByteArrayOutputStream arrayOut;
+  private LittleEndianDataOutputStream out;
+
+  public PlainValuesWriter(int initialSize, int pageSize) {
+    arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize);
+    out = new LittleEndianDataOutputStream(arrayOut);
+  }
+
+  @Override
+  public final void writeBytes(Binary v) {
+    try {
+      out.writeInt(v.length());
+      v.writeTo(out);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write bytes", e);
+    }
+  }
+
+  @Override
+  public final void writeInteger(int v) {
+    try {
+      out.writeInt(v);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write int", e);
+    }
+  }
+
+  @Override
+  public final void writeLong(long v) {
+    try {
+      out.writeLong(v);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write long", e);
+    }
+  }
+
+  @Override
+  public final void writeFloat(float v) {
+    try {
+      out.writeFloat(v);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write float", e);
+    }
+  }
+
+  @Override
+  public final void writeDouble(double v) {
+    try {
+      out.writeDouble(v);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write double", e);
+    }
+  }
+
+  @Override
+  public void writeByte(int value) {
+    try {
+      out.write(value);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write byte", e);
+    }
+  }
+
+  @Override
+  public long getBufferedSize() {
+    return arrayOut.size();
+  }
+
+  @Override
+  public BytesInput getBytes() {
+    try {
+      out.flush();
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write page", e);
+    }
+    if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size());
+    return BytesInput.from(arrayOut);
+  }
+
+  @Override
+  public void reset() {
+    arrayOut.reset();
+  }
+
+  @Override
+  public long getAllocatedSize() {
+    return arrayOut.getCapacity();
+  }
+
+  @Override
+  public Encoding getEncoding() {
+    return Encoding.PLAIN;
+  }
+
+  @Override
+  public String memUsageString(String prefix) {
+    return arrayOut.memUsageString(prefix + " PLAIN");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
new file mode 100644
index 0000000..38eb354
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridDecoder.java
@@ -0,0 +1,109 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.rle;
+
+import static org.apache.parquet.Log.DEBUG;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.values.bitpacking.BytePacker;
+import org.apache.parquet.column.values.bitpacking.Packer;
+import org.apache.parquet.io.ParquetDecodingException;
+
+/**
+ * Decodes values written in the grammar described in {@link RunLengthBitPackingHybridEncoder}
+ *
+ * @author Julien Le Dem
+ */
+public class RunLengthBitPackingHybridDecoder {
+  private static final Log LOG = Log.getLog(RunLengthBitPackingHybridDecoder.class);
+
+  private static enum MODE { RLE, PACKED }
+
+  private final int bitWidth;
+  private final BytePacker packer;
+  private final ByteArrayInputStream in;
+
+  private MODE mode;
+  private int currentCount;
+  private int currentValue;
+  private int[] currentBuffer;
+
+  public RunLengthBitPackingHybridDecoder(int bitWidth, ByteArrayInputStream in) {
+    if (DEBUG) LOG.debug("decoding bitWidth " + bitWidth);
+
+    Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
+    this.bitWidth = bitWidth;
+    this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
+    this.in = in;
+  }
+
+  public int readInt() throws IOException {
+    if (currentCount == 0) {
+      readNext();
+    }
+    -- currentCount;
+    int result;
+    switch (mode) {
+    case RLE:
+      result = currentValue;
+      break;
+    case PACKED:
+      result = currentBuffer[currentBuffer.length - 1 - currentCount];
+      break;
+    default:
+      throw new ParquetDecodingException("not a valid mode " + mode);
+    }
+    return result;
+  }
+
+  private void readNext() throws IOException {
+    Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream.");
+    final int header = BytesUtils.readUnsignedVarInt(in);
+    mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED;
+    switch (mode) {
+    case RLE:
+      currentCount = header >>> 1;
+      if (DEBUG) LOG.debug("reading " + currentCount + " values RLE");
+      currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth);
+      break;
+    case PACKED:
+      int numGroups = header >>> 1;
+      currentCount = numGroups * 8;
+      if (DEBUG) LOG.debug("reading " + currentCount + " values BIT PACKED");
+      currentBuffer = new int[currentCount]; // TODO: reuse a buffer
+      byte[] bytes = new byte[numGroups * bitWidth];
+      // At the end of the file RLE data though, there might not be that many bytes left.
+      int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0);
+      bytesToRead = Math.min(bytesToRead, in.available());
+      new DataInputStream(in).readFully(bytes, 0, bytesToRead);
+      for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) {
+        packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
+      }
+      break;
+    default:
+      throw new ParquetDecodingException("not a valid mode " + mode);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
new file mode 100644
index 0000000..9d37574
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridEncoder.java
@@ -0,0 +1,291 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.rle;
+
+import java.io.IOException;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
+import org.apache.parquet.column.values.bitpacking.BytePacker;
+import org.apache.parquet.column.values.bitpacking.Packer;
+
+import static org.apache.parquet.Log.DEBUG;
+
+/**
+ * Encodes values using a combination of run length encoding and bit packing,
+ * according to the following grammar:
+ *
+ * <pre>
+ * {@code
+ * rle-bit-packed-hybrid: <length> <encoded-data>
+ * length := length of the <encoded-data> in bytes stored as 4 bytes little endian
+ * encoded-data := <run>*
+ * run := <bit-packed-run> | <rle-run>
+ * bit-packed-run := <bit-packed-header> <bit-packed-values>
+ * bit-packed-header := varint-encode(<bit-pack-count> << 1 | 1)
+ * // we always bit-pack a multiple of 8 values at a time, so we only store the number of values / 8
+ * bit-pack-count := (number of values in this run) / 8
+ * bit-packed-values :=  bit packed back to back, from LSB to MSB
+ * rle-run := <rle-header> <repeated-value>
+ * rle-header := varint-encode( (number of times repeated) << 1)
+ * repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
+ * }
+ * </pre>
+ * NOTE: this class is only responsible for creating and returning the {@code <encoded-data>}
+ *       portion of the above grammar. The {@code <length>} portion is done by
+ *       {@link RunLengthBitPackingHybridValuesWriter}
+ * <p>
+ * Only supports values >= 0 // TODO: is that ok? Should we make a signed version?
+ *
+ * @author Alex Levenson
+ */
+public class RunLengthBitPackingHybridEncoder {
+  private static final Log LOG = Log.getLog(RunLengthBitPackingHybridEncoder.class);
+
+  private final BytePacker packer;
+
+  private final CapacityByteArrayOutputStream baos;
+
+  /**
+   * The bit width used for bit-packing and for writing
+   * the repeated-value
+   */
+  private final int bitWidth;
+
+  /**
+   * Values that are bit packed 8 at at a time are packed into this
+   * buffer, which is then written to baos
+   */
+  private final byte[] packBuffer;
+
+  /**
+   * Previous value written, used to detect repeated values
+   */
+  private int previousValue;
+
+  /**
+   * We buffer 8 values at a time, and either bit pack them
+   * or discard them after writing a rle-run
+   */
+  private final int[] bufferedValues;
+  private int numBufferedValues;
+
+  /**
+   * How many times a value has been repeated
+   */
+  private int repeatCount;
+
+  /**
+   * How many groups of 8 values have been written
+   * to the current bit-packed-run
+   */
+  private int bitPackedGroupCount;
+
+  /**
+   * A "pointer" to a single byte in baos,
+   * which we use as our bit-packed-header. It's really
+   * the logical index of the byte in baos.
+   *
+   * We are only using one byte for this header,
+   * which limits us to writing 504 values per bit-packed-run.
+   *
+   * MSB must be 0 for varint encoding, LSB must be 1 to signify
+   * that this is a bit-packed-header leaves 6 bits to write the
+   * number of 8-groups -> (2^6 - 1) * 8 = 504
+   */
+  private long bitPackedRunHeaderPointer;
+
+  private boolean toBytesCalled;
+
+  public RunLengthBitPackingHybridEncoder(int bitWidth, int initialCapacity, int pageSize) {
+    if (DEBUG) {
+      LOG.debug(String.format("Encoding: RunLengthBitPackingHybridEncoder with "
+        + "bithWidth: %d initialCapacity %d", bitWidth, initialCapacity));
+    }
+
+    Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
+
+    this.bitWidth = bitWidth;
+    this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize);
+    this.packBuffer = new byte[bitWidth];
+    this.bufferedValues = new int[8];
+    this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
+    reset(false);
+  }
+
+  private void reset(boolean resetBaos) {
+    if (resetBaos) {
+      this.baos.reset();
+    }
+    this.previousValue = 0;
+    this.numBufferedValues = 0;
+    this.repeatCount = 0;
+    this.bitPackedGroupCount = 0;
+    this.bitPackedRunHeaderPointer = -1;
+    this.toBytesCalled = false;
+  }
+
+  public void writeInt(int value) throws IOException {
+    if (value == previousValue) {
+      // keep track of how many times we've seen this value
+      // consecutively
+      ++repeatCount;
+
+      if (repeatCount >= 8) {
+        // we've seen this at least 8 times, we're
+        // certainly going to write an rle-run,
+        // so just keep on counting repeats for now
+        return;
+      }
+    } else {
+      // This is a new value, check if it signals the end of
+      // an rle-run
+      if (repeatCount >= 8) {
+        // it does! write an rle-run
+        writeRleRun();
+      }
+
+      // this is a new value so we've only seen it once
+      repeatCount = 1;
+      // start tracking this value for repeats
+      previousValue = value;
+    }
+
+    // We have not seen enough repeats to justify an rle-run yet,
+    // so buffer this value in case we decide to write a bit-packed-run
+    bufferedValues[numBufferedValues] = value;
+    ++numBufferedValues;
+
+    if (numBufferedValues == 8) {
+      // we've encountered less than 8 repeated values, so
+      // either start a new bit-packed-run or append to the
+      // current bit-packed-run
+      writeOrAppendBitPackedRun();
+    }
+  }
+
+  private void writeOrAppendBitPackedRun() throws IOException {
+    if (bitPackedGroupCount >= 63) {
+      // we've packed as many values as we can for this run,
+      // end it and start a new one
+      endPreviousBitPackedRun();
+    }
+
+    if (bitPackedRunHeaderPointer == -1) {
+      // this is a new bit-packed-run, allocate a byte for the header
+      // and keep a "pointer" to it so that it can be mutated later
+      baos.write(0); // write a sentinel value
+      bitPackedRunHeaderPointer = baos.getCurrentIndex();
+    }
+
+    packer.pack8Values(bufferedValues, 0, packBuffer, 0);
+    baos.write(packBuffer);
+
+    // empty the buffer, they've all been written
+    numBufferedValues = 0;
+
+    // clear the repeat count, as some repeated values
+    // may have just been bit packed into this run
+    repeatCount = 0;
+
+    ++bitPackedGroupCount;
+  }
+
+  /**
+   * If we are currently writing a bit-packed-run, update the
+   * bit-packed-header and consider this run to be over
+   *
+   * does nothing if we're not currently writing a bit-packed run
+   */
+  private void endPreviousBitPackedRun() {
+    if (bitPackedRunHeaderPointer == -1) {
+      // we're not currently in a bit-packed-run
+      return;
+    }
+
+    // create bit-packed-header, which needs to fit in 1 byte
+    byte bitPackHeader = (byte) ((bitPackedGroupCount << 1) | 1);
+
+    // update this byte
+    baos.setByte(bitPackedRunHeaderPointer, bitPackHeader);
+
+    // mark that this run is over
+    bitPackedRunHeaderPointer = -1;
+
+    // reset the number of groups
+    bitPackedGroupCount = 0;
+  }
+
+  private void writeRleRun() throws IOException {
+    // we may have been working on a bit-packed-run
+    // so close that run if it exists before writing this
+    // rle-run
+    endPreviousBitPackedRun();
+
+    // write the rle-header (lsb of 0 signifies a rle run)
+    BytesUtils.writeUnsignedVarInt(repeatCount << 1, baos);
+    // write the repeated-value
+    BytesUtils.writeIntLittleEndianPaddedOnBitWidth(baos, previousValue, bitWidth);
+
+    // reset the repeat count
+    repeatCount = 0;
+
+    // throw away all the buffered values, they were just repeats and they've been written
+    numBufferedValues = 0;
+  }
+
+  public BytesInput toBytes() throws IOException {
+    Preconditions.checkArgument(!toBytesCalled,
+        "You cannot call toBytes() more than once without calling reset()");
+
+    // write anything that is buffered / queued up for an rle-run
+    if (repeatCount >= 8) {
+      writeRleRun();
+    } else if(numBufferedValues > 0) {
+      for (int i = numBufferedValues; i < 8; i++) {
+        bufferedValues[i] = 0;
+      }
+      writeOrAppendBitPackedRun();
+      endPreviousBitPackedRun();
+    } else {
+      endPreviousBitPackedRun();
+    }
+
+    toBytesCalled = true;
+    return BytesInput.from(baos);
+  }
+
+  /**
+   * Reset this encoder for re-use
+   */
+  public void reset() {
+    reset(true);
+  }
+
+  public long getBufferedSize() {
+    return baos.size();
+  }
+
+  public long getAllocatedSize() {
+    return baos.getCapacity();
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
new file mode 100644
index 0000000..bd4e11d
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesReader.java
@@ -0,0 +1,77 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.rle;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.values.ValuesReader;
+import org.apache.parquet.io.ParquetDecodingException;
+
+/**
+ * This ValuesReader does all the reading in {@link #initFromPage}
+ * and stores the values in an in memory buffer, which is less than ideal.
+ *
+ * @author Alex Levenson
+ */
+public class RunLengthBitPackingHybridValuesReader extends ValuesReader {
+  private final int bitWidth;
+  private RunLengthBitPackingHybridDecoder decoder;
+  private int nextOffset;
+
+  public RunLengthBitPackingHybridValuesReader(int bitWidth) {
+    this.bitWidth = bitWidth;
+  }
+
+  @Override
+  public void initFromPage(int valueCountL, byte[] page, int offset) throws IOException {
+    ByteArrayInputStream in = new ByteArrayInputStream(page, offset, page.length - offset);
+    int length = BytesUtils.readIntLittleEndian(in);
+
+    decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in);
+
+    // 4 is for the length which is stored as 4 bytes little endian
+    this.nextOffset = offset + length + 4;
+  }
+  
+  @Override
+  public int getNextOffset() {
+    return this.nextOffset;
+  }
+
+  @Override
+  public int readInteger() {
+    try {
+      return decoder.readInt();
+    } catch (IOException e) {
+      throw new ParquetDecodingException(e);
+    }
+  }
+  
+  @Override
+  public boolean readBoolean() {
+    return readInteger() == 0 ? false : true;
+  }
+
+  @Override
+  public void skip() {
+    readInteger();
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
new file mode 100644
index 0000000..bccfd34
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
@@ -0,0 +1,88 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.rle;
+
+import java.io.IOException;
+
+import org.apache.parquet.Ints;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.io.ParquetEncodingException;
+
+/**
+ * @author Alex Levenson
+ */
+public class RunLengthBitPackingHybridValuesWriter extends ValuesWriter {
+  private final RunLengthBitPackingHybridEncoder encoder;
+
+  public RunLengthBitPackingHybridValuesWriter(int bitWidth, int initialCapacity, int pageSize) {
+    this.encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialCapacity, pageSize);
+  }
+
+  @Override
+  public void writeInteger(int v) {
+    try {
+      encoder.writeInt(v);
+    } catch (IOException e) {
+      throw new ParquetEncodingException(e);
+    }
+  }
+
+  @Override
+  public void writeBoolean(boolean v) {
+    writeInteger(v ? 1 : 0);
+  }
+
+  @Override
+  public long getBufferedSize() {
+    return encoder.getBufferedSize();
+  }
+
+  @Override
+  public long getAllocatedSize() {
+    return encoder.getAllocatedSize();
+  }
+
+  @Override
+  public BytesInput getBytes() {
+    try {
+      // prepend the length of the column
+      BytesInput rle = encoder.toBytes();
+      return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle);
+    } catch (IOException e) {
+      throw new ParquetEncodingException(e);
+    }
+  }
+
+  @Override
+  public Encoding getEncoding() {
+    return Encoding.RLE;
+  }
+
+  @Override
+  public void reset() {
+    encoder.reset();
+  }
+
+  @Override
+  public String memUsageString(String prefix) {
+    return String.format("%s RunLengthBitPackingHybrid %d bytes", prefix, getAllocatedSize());
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/DummyRecordConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/DummyRecordConverter.java b/parquet-column/src/main/java/org/apache/parquet/example/DummyRecordConverter.java
new file mode 100644
index 0000000..c9c3589
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/DummyRecordConverter.java
@@ -0,0 +1,114 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example;
+
+import java.util.List;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.Converter;
+import org.apache.parquet.io.api.GroupConverter;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.io.api.RecordMaterializer;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.TypeConverter;
+
+/**
+ * Dummy implementation for perf tests
+ *
+ * @author Julien Le Dem
+ *
+ */
+public final class DummyRecordConverter extends RecordMaterializer<Object> {
+
+  private Object a;
+  private GroupConverter root;
+
+  public DummyRecordConverter(MessageType schema) {
+    this.root = (GroupConverter)schema.convertWith(new TypeConverter<Converter>() {
+
+      @Override
+      public Converter convertPrimitiveType(List<GroupType> path, PrimitiveType primitiveType) {
+        return new PrimitiveConverter() {
+
+          @Override
+          public void addBinary(Binary value) {
+            a = value;
+          }
+          @Override
+          public void addBoolean(boolean value) {
+            a = value;
+          }
+          @Override
+          public void addDouble(double value) {
+            a = value;
+          }
+          @Override
+          public void addFloat(float value) {
+            a = value;
+          }
+          @Override
+          public void addInt(int value) {
+            a = value;
+          }
+          @Override
+          public void addLong(long value) {
+            a = value;
+          }
+        };
+      }
+
+      @Override
+      public Converter convertGroupType(List<GroupType> path, GroupType groupType, final List<Converter> converters) {
+        return new GroupConverter() {
+
+          public Converter getConverter(int fieldIndex) {
+            return converters.get(fieldIndex);
+          }
+
+          public void start() {
+            a = "start()";
+          }
+
+          public void end() {
+            a = "end()";
+          }
+
+        };
+      }
+
+      @Override
+      public Converter convertMessageType(MessageType messageType, List<Converter> children) {
+        return convertGroupType(null, messageType, children);
+      }
+    });
+  }
+
+  @Override
+  public Object getCurrentRecord() {
+    return a;
+  }
+
+  @Override
+  public GroupConverter getRootConverter() {
+    return root;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/Paper.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/Paper.java b/parquet-column/src/main/java/org/apache/parquet/example/Paper.java
new file mode 100644
index 0000000..a55cdbc
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/Paper.java
@@ -0,0 +1,167 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example;
+
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
+import static org.apache.parquet.schema.Type.Repetition.REPEATED;
+import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroup;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+
+/**
+ * Examples from the Dremel Paper
+ *
+ * @author Julien Le Dem
+ *
+ */
+public class Paper {
+  public static final MessageType schema =
+      new MessageType("Document",
+          new PrimitiveType(REQUIRED, INT64, "DocId"),
+          new GroupType(OPTIONAL, "Links",
+              new PrimitiveType(REPEATED, INT64, "Backward"),
+              new PrimitiveType(REPEATED, INT64, "Forward")
+              ),
+          new GroupType(REPEATED, "Name",
+              new GroupType(REPEATED, "Language",
+                  new PrimitiveType(REQUIRED, BINARY, "Code"),
+                  new PrimitiveType(OPTIONAL, BINARY, "Country")),
+              new PrimitiveType(OPTIONAL, BINARY, "Url")));
+
+  public static final MessageType schema2 =
+      new MessageType("Document",
+          new PrimitiveType(REQUIRED, INT64, "DocId"),
+          new GroupType(REPEATED, "Name",
+              new GroupType(REPEATED, "Language",
+                  new PrimitiveType(OPTIONAL, BINARY, "Country"))));
+
+  public static final MessageType schema3 =
+      new MessageType("Document",
+          new PrimitiveType(REQUIRED, INT64, "DocId"),
+          new GroupType(OPTIONAL, "Links",
+              new PrimitiveType(REPEATED, INT64, "Backward"),
+              new PrimitiveType(REPEATED, INT64, "Forward")
+              ));
+
+  public static final SimpleGroup r1 = new SimpleGroup(schema);
+  public static final SimpleGroup r2 = new SimpleGroup(schema);
+  ////r1
+  //DocId: 10
+  //Links
+  //  Forward: 20
+  //  Forward: 40
+  //  Forward: 60
+  //Name
+  //  Language
+  //    Code: 'en-us'
+  //    Country: 'us'
+  //  Language
+  //    Code: 'en'
+  //  Url: 'http://A'
+  //Name
+  //  Url: 'http://B'
+  //Name
+  //  Language
+  //    Code: 'en-gb'
+  //    Country: 'gb'
+  static {
+    r1.add("DocId", 10l);
+    r1.addGroup("Links")
+      .append("Forward", 20l)
+      .append("Forward", 40l)
+      .append("Forward", 60l);
+    Group name = r1.addGroup("Name");
+    {
+      name.addGroup("Language")
+        .append("Code", "en-us")
+        .append("Country", "us");
+      name.addGroup("Language")
+        .append("Code", "en");
+      name.append("Url", "http://A");
+    }
+    name = r1.addGroup("Name");
+    {
+      name.append("Url", "http://B");
+    }
+    name = r1.addGroup("Name");
+    {
+      name.addGroup("Language")
+        .append("Code", "en-gb")
+        .append("Country", "gb");
+    }
+  }
+  ////r2
+  //DocId: 20
+  //Links
+  // Backward: 10
+  // Backward: 30
+  // Forward:  80
+  //Name
+  // Url: 'http://C'
+  static {
+    r2.add("DocId", 20l);
+    r2.addGroup("Links")
+      .append("Backward", 10l)
+      .append("Backward", 30l)
+      .append("Forward", 80l);
+    r2.addGroup("Name")
+      .append("Url", "http://C");
+  }
+
+  public static final SimpleGroup pr1 = new SimpleGroup(schema2);
+  public static final SimpleGroup pr2 = new SimpleGroup(schema2);
+  ////r1
+  //DocId: 10
+  //Name
+  //  Language
+  //    Country: 'us'
+  //  Language
+  //Name
+  //Name
+  //  Language
+  //    Country: 'gb'
+  static {
+    pr1.add("DocId", 10l);
+    Group name = pr1.addGroup("Name");
+    {
+      name.addGroup("Language")
+        .append("Country", "us");
+      name.addGroup("Language");
+    }
+    name = pr1.addGroup("Name");
+    name = pr1.addGroup("Name");
+    {
+      name.addGroup("Language")
+        .append("Country", "gb");
+    }
+  }
+
+  ////r2
+  //DocId: 20
+  //Name
+  static {
+    pr2.add("DocId", 20l);
+    pr2.addGroup("Name");
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/Group.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/Group.java b/parquet-column/src/main/java/org/apache/parquet/example/data/Group.java
new file mode 100644
index 0000000..3fb7d4d
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/Group.java
@@ -0,0 +1,143 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.example.data.simple.NanoTime;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+
+abstract public class Group extends GroupValueSource {
+  private static final Log logger = Log.getLog(Group.class);
+  private static final boolean DEBUG = Log.DEBUG;
+
+  public void add(String field, int value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, long value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, float value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, double value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, String value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, NanoTime value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, boolean value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, Binary value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public void add(String field, Group value) {
+    add(getType().getFieldIndex(field), value);
+  }
+
+  public Group addGroup(String field) {
+    if (DEBUG) logger.debug("add group "+field+" to "+getType().getName());
+    return addGroup(getType().getFieldIndex(field));
+  }
+
+  public Group getGroup(String field, int index) {
+    return getGroup(getType().getFieldIndex(field), index);
+  }
+
+  abstract public void add(int fieldIndex, int value);
+
+  abstract public void add(int fieldIndex, long value);
+
+  abstract public void add(int fieldIndex, String value);
+
+  abstract public void add(int fieldIndex, boolean value);
+
+  abstract public void add(int fieldIndex, NanoTime value);
+
+  abstract public void add(int fieldIndex, Binary value);
+
+  abstract public void add(int fieldIndex, float value);
+
+  abstract public void add(int fieldIndex, double value);
+
+  abstract public void add(int fieldIndex, Group value);
+
+  abstract public Group addGroup(int fieldIndex);
+
+  abstract public Group getGroup(int fieldIndex, int index);
+
+  public Group asGroup() {
+    return this;
+  }
+
+  public Group append(String fieldName, int value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, float value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, double value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, long value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, NanoTime value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, String value) {
+    add(fieldName, Binary.fromString(value));
+    return this;
+  }
+
+  public Group append(String fieldName, boolean value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  public Group append(String fieldName, Binary value) {
+    add(fieldName, value);
+    return this;
+  }
+
+  abstract public void writeValue(int field, int index, RecordConsumer recordConsumer);
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/GroupFactory.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/GroupFactory.java b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupFactory.java
new file mode 100644
index 0000000..4c11775
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupFactory.java
@@ -0,0 +1,25 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data;
+
+abstract public class GroupFactory {
+
+  abstract public Group newGroup();
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/GroupValueSource.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/GroupValueSource.java b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupValueSource.java
new file mode 100644
index 0000000..4c24e8b
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupValueSource.java
@@ -0,0 +1,89 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.GroupType;
+
+abstract public class GroupValueSource {
+
+  public int getFieldRepetitionCount(String field) {
+    return getFieldRepetitionCount(getType().getFieldIndex(field));
+  }
+
+  public GroupValueSource getGroup(String field, int index) {
+    return getGroup(getType().getFieldIndex(field), index);
+  }
+
+  public String getString(String field, int index) {
+    return getString(getType().getFieldIndex(field), index);
+  }
+
+  public int getInteger(String field, int index) {
+    return getInteger(getType().getFieldIndex(field), index);
+  }
+
+  public long getLong(String field, int index) {
+    return getLong(getType().getFieldIndex(field), index);
+  }
+
+  public double getDouble(String field, int index) {
+    return getDouble(getType().getFieldIndex(field), index);
+  }
+
+  public float getFloat(String field, int index) {
+    return getFloat(getType().getFieldIndex(field), index);
+  }
+
+  public boolean getBoolean(String field, int index) {
+    return getBoolean(getType().getFieldIndex(field), index);
+  }
+
+  public Binary getBinary(String field, int index) {
+    return getBinary(getType().getFieldIndex(field), index);
+  }
+
+  public Binary getInt96(String field, int index) {
+    return getInt96(getType().getFieldIndex(field), index);
+  }
+
+  abstract public int getFieldRepetitionCount(int fieldIndex);
+
+  abstract public GroupValueSource getGroup(int fieldIndex, int index);
+
+  abstract public String getString(int fieldIndex, int index);
+
+  abstract public int getInteger(int fieldIndex, int index);
+
+  abstract public long getLong(int fieldIndex, int index);
+
+  abstract public double getDouble(int fieldIndex, int index);
+
+  abstract public float getFloat(int fieldIndex, int index);
+
+  abstract public boolean getBoolean(int fieldIndex, int index);
+
+  abstract public Binary getBinary(int fieldIndex, int index);
+
+  abstract public Binary getInt96(int fieldIndex, int index);
+
+  abstract public String getValueToString(int fieldIndex, int index);
+
+  abstract public GroupType getType();
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/GroupWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/GroupWriter.java b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupWriter.java
new file mode 100644
index 0000000..c67295c
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/GroupWriter.java
@@ -0,0 +1,62 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data;
+
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.Type;
+
+public class GroupWriter {
+
+  private final RecordConsumer recordConsumer;
+  private final GroupType schema;
+
+  public GroupWriter(RecordConsumer recordConsumer, GroupType schema) {
+    this.recordConsumer = recordConsumer;
+    this.schema = schema;
+  }
+
+  public void write(Group group) {
+    recordConsumer.startMessage();
+    writeGroup(group, schema);
+    recordConsumer.endMessage();
+  }
+
+  private void writeGroup(Group group, GroupType type) {
+    int fieldCount = type.getFieldCount();
+    for (int field = 0; field < fieldCount; ++field) {
+      int valueCount = group.getFieldRepetitionCount(field);
+      if (valueCount > 0) {
+        Type fieldType = type.getType(field);
+        String fieldName = fieldType.getName();
+        recordConsumer.startField(fieldName, field);
+        for (int index = 0; index < valueCount; ++index) {
+          if (fieldType.isPrimitive()) {
+            group.writeValue(field, index, recordConsumer);
+          } else {
+            recordConsumer.startGroup();
+            writeGroup(group.getGroup(field, index), fieldType.asGroupType());
+            recordConsumer.endGroup();
+          }
+        }
+        recordConsumer.endField(fieldName, field);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BinaryValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BinaryValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BinaryValue.java
new file mode 100644
index 0000000..66a1ef5
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BinaryValue.java
@@ -0,0 +1,52 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+
+
+public class BinaryValue extends Primitive {
+
+  private final Binary binary;
+
+  public BinaryValue(Binary binary) {
+    this.binary = binary;
+  }
+
+  @Override
+  public Binary getBinary() {
+    return binary;
+  }
+
+  @Override
+  public String getString() {
+    return binary.toStringUsingUTF8();
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addBinary(binary);
+  }
+
+  @Override
+  public String toString() {
+    return getString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BooleanValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BooleanValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BooleanValue.java
new file mode 100644
index 0000000..b614c82
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/BooleanValue.java
@@ -0,0 +1,44 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class BooleanValue extends Primitive {
+
+  private final boolean bool;
+  public BooleanValue(boolean bool) {
+    this.bool = bool;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(bool);
+  }
+
+  @Override
+  public boolean getBoolean() {
+    return bool;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addBoolean(bool);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/DoubleValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/DoubleValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/DoubleValue.java
new file mode 100644
index 0000000..6ec92da
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/DoubleValue.java
@@ -0,0 +1,45 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class DoubleValue extends Primitive {
+
+  private final double value;
+
+  public DoubleValue(double value) {
+    this.value = value;
+  }
+
+  @Override
+  public double getDouble() {
+    return value;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addDouble(value);
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/FloatValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/FloatValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/FloatValue.java
new file mode 100644
index 0000000..ce589b3
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/FloatValue.java
@@ -0,0 +1,45 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class FloatValue extends Primitive {
+
+  private final float value;
+
+  public FloatValue(float value) {
+    this.value = value;
+  }
+
+  @Override
+  public float getFloat() {
+    return value;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addFloat(value);
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Int96Value.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Int96Value.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Int96Value.java
new file mode 100644
index 0000000..da01294
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Int96Value.java
@@ -0,0 +1,46 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class Int96Value extends Primitive {
+
+  private final Binary value;
+
+  public Int96Value(Binary value) {
+    this.value = value;
+  }
+
+  @Override
+  public Binary getInt96() {
+    return value;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addBinary(value);
+  }
+
+  @Override
+  public String toString() {
+    return "Int96Value{" + String.valueOf(value) + "}";
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/IntegerValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/IntegerValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/IntegerValue.java
new file mode 100644
index 0000000..f8c43b1
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/IntegerValue.java
@@ -0,0 +1,46 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.RecordConsumer;
+
+
+public class IntegerValue extends Primitive {
+
+  private final int value;
+
+  public IntegerValue(int value) {
+    this.value = value;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+
+  @Override
+  public int getInteger() {
+    return value;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addInteger(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/LongValue.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/LongValue.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/LongValue.java
new file mode 100644
index 0000000..e643f85
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/LongValue.java
@@ -0,0 +1,45 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class LongValue extends Primitive {
+
+  private final long value;
+
+  public LongValue(long value) {
+    this.value = value;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+
+  @Override
+  public long getLong() {
+    return value;
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addLong(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/NanoTime.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/NanoTime.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/NanoTime.java
new file mode 100644
index 0000000..61eff42
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/NanoTime.java
@@ -0,0 +1,80 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+
+public class NanoTime extends Primitive {
+  private final int julianDay;
+  private final long timeOfDayNanos;
+
+  public static NanoTime fromBinary(Binary bytes) {
+    Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes");
+    ByteBuffer buf = bytes.toByteBuffer();
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    long timeOfDayNanos = buf.getLong();
+    int julianDay = buf.getInt();
+    return new NanoTime(julianDay, timeOfDayNanos);
+  }
+
+  public static NanoTime fromInt96(Int96Value int96) {
+    ByteBuffer buf = int96.getInt96().toByteBuffer();
+    return new NanoTime(buf.getInt(), buf.getLong());
+  }
+
+  public NanoTime(int julianDay, long timeOfDayNanos) {
+    this.julianDay = julianDay;
+    this.timeOfDayNanos = timeOfDayNanos;
+  }
+
+  public int getJulianDay() {
+    return julianDay;
+  }
+
+  public long getTimeOfDayNanos() {
+    return timeOfDayNanos;
+  }
+
+  public Binary toBinary() {
+    ByteBuffer buf = ByteBuffer.allocate(12);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+    buf.putLong(timeOfDayNanos);
+    buf.putInt(julianDay);
+    buf.flip();
+    return Binary.fromByteBuffer(buf);
+  }
+
+  public Int96Value toInt96() {
+    return new Int96Value(toBinary());
+  }
+
+  @Override
+  public void writeValue(RecordConsumer recordConsumer) {
+    recordConsumer.addBinary(toBinary());
+  }
+
+  @Override
+  public String toString() {
+    return "NanoTime{julianDay="+julianDay+", timeOfDayNanos="+timeOfDayNanos+"}";
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Primitive.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Primitive.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Primitive.java
new file mode 100644
index 0000000..68eb98b
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/Primitive.java
@@ -0,0 +1,60 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+
+public abstract class Primitive {
+
+  public String getString() {
+    throw new UnsupportedOperationException();
+  }
+
+  public int getInteger() {
+    throw new UnsupportedOperationException();
+  }
+
+  public long getLong() {
+    throw new UnsupportedOperationException();
+  }
+
+  public boolean getBoolean() {
+    throw new UnsupportedOperationException();
+  }
+
+  public Binary getBinary() {
+    throw new UnsupportedOperationException();
+  }
+
+  public Binary getInt96() {
+    throw new UnsupportedOperationException();
+  }
+
+  public float getFloat() {
+    throw new UnsupportedOperationException();
+  }
+
+  public double getDouble() {
+    throw new UnsupportedOperationException();
+  }
+
+  abstract public void writeValue(RecordConsumer recordConsumer);
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroup.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroup.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroup.java
new file mode 100644
index 0000000..2e58edc
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroup.java
@@ -0,0 +1,232 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.Type;
+
+
+public class SimpleGroup extends Group {
+
+  private final GroupType schema;
+  private final List<Object>[] data;
+
+  @SuppressWarnings("unchecked")
+  public SimpleGroup(GroupType schema) {
+    this.schema = schema;
+    this.data = new List[schema.getFields().size()];
+    for (int i = 0; i < schema.getFieldCount(); i++) {
+       this.data[i] = new ArrayList<Object>();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return toString("");
+  }
+
+  public String toString(String indent) {
+    String result = "";
+    int i = 0;
+    for (Type field : schema.getFields()) {
+      String name = field.getName();
+      List<Object> values = data[i];
+      ++i;
+      if (values != null) {
+        if (values.size() > 0) {
+          for (Object value : values) {
+            result += indent + name;
+            if (value == null) {
+              result += ": NULL\n";
+            } else if (value instanceof Group) {
+              result += "\n" + ((SimpleGroup)value).toString(indent+"  ");
+            } else {
+              result += ": " + value.toString() + "\n";
+            }
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public Group addGroup(int fieldIndex) {
+    SimpleGroup g = new SimpleGroup(schema.getType(fieldIndex).asGroupType());
+    add(fieldIndex, g);
+    return g;
+  }
+
+  @Override
+  public Group getGroup(int fieldIndex, int index) {
+    return (Group)getValue(fieldIndex, index);
+  }
+
+  private Object getValue(int fieldIndex, int index) {
+    List<Object> list;
+    try {
+      list = data[fieldIndex];
+    } catch (IndexOutOfBoundsException e) {
+      throw new RuntimeException("not found " + fieldIndex + "(" + schema.getFieldName(fieldIndex) + ") in group:\n" + this);
+    }
+    try {
+      return list.get(index);
+    } catch (IndexOutOfBoundsException e) {
+      throw new RuntimeException("not found " + fieldIndex + "(" + schema.getFieldName(fieldIndex) + ") element number " + index + " in group:\n" + this);
+    }
+  }
+
+  private void add(int fieldIndex, Primitive value) {
+    Type type = schema.getType(fieldIndex);
+    List<Object> list = data[fieldIndex];
+    if (!type.isRepetition(Type.Repetition.REPEATED)
+        && !list.isEmpty()) {
+      throw new IllegalStateException("field "+fieldIndex+" (" + type.getName() + ") can not have more than one value: " + list);
+    }
+    list.add(value);
+  }
+
+  @Override
+  public int getFieldRepetitionCount(int fieldIndex) {
+    List<Object> list = data[fieldIndex];
+    return list == null ? 0 : list.size();
+  }
+
+  @Override
+  public String getValueToString(int fieldIndex, int index) {
+    return String.valueOf(getValue(fieldIndex, index));
+  }
+
+  @Override
+  public String getString(int fieldIndex, int index) {
+    return ((BinaryValue)getValue(fieldIndex, index)).getString();
+  }
+
+  @Override
+  public int getInteger(int fieldIndex, int index) {
+    return ((IntegerValue)getValue(fieldIndex, index)).getInteger();
+  }
+
+  @Override
+  public long getLong(int fieldIndex, int index) {
+    return ((LongValue)getValue(fieldIndex, index)).getLong();
+  }
+
+  @Override
+  public double getDouble(int fieldIndex, int index) {
+    return ((DoubleValue)getValue(fieldIndex, index)).getDouble();
+  }
+
+  @Override
+  public float getFloat(int fieldIndex, int index) {
+    return ((FloatValue)getValue(fieldIndex, index)).getFloat();
+  }
+
+  @Override
+  public boolean getBoolean(int fieldIndex, int index) {
+    return ((BooleanValue)getValue(fieldIndex, index)).getBoolean();
+  }
+
+  @Override
+  public Binary getBinary(int fieldIndex, int index) {
+    return ((BinaryValue)getValue(fieldIndex, index)).getBinary();
+  }
+
+  public NanoTime getTimeNanos(int fieldIndex, int index) {
+    return NanoTime.fromInt96((Int96Value)getValue(fieldIndex, index));
+  }
+
+  @Override
+  public Binary getInt96(int fieldIndex, int index) {
+    return ((Int96Value)getValue(fieldIndex, index)).getInt96();
+  }
+
+  @Override
+  public void add(int fieldIndex, int value) {
+    add(fieldIndex, new IntegerValue(value));
+  }
+
+  @Override
+  public void add(int fieldIndex, long value) {
+    add(fieldIndex, new LongValue(value));
+  }
+
+  @Override
+  public void add(int fieldIndex, String value) {
+    add(fieldIndex, new BinaryValue(Binary.fromString(value)));
+  }
+
+  @Override
+  public void add(int fieldIndex, NanoTime value) {
+    add(fieldIndex, value.toInt96());
+  }
+
+  @Override
+  public void add(int fieldIndex, boolean value) {
+    add(fieldIndex, new BooleanValue(value));
+  }
+
+  @Override
+  public void add(int fieldIndex, Binary value) {
+    switch (getType().getType(fieldIndex).asPrimitiveType().getPrimitiveTypeName()) {
+      case BINARY:
+      case FIXED_LEN_BYTE_ARRAY:
+        add(fieldIndex, new BinaryValue(value));
+        break;
+      case INT96:
+        add(fieldIndex, new Int96Value(value));
+        break;
+      default:
+        throw new UnsupportedOperationException(
+            getType().asPrimitiveType().getName() + " not supported for Binary");
+    }
+  }
+
+  @Override
+  public void add(int fieldIndex, float value) {
+    add(fieldIndex, new FloatValue(value));
+  }
+
+  @Override
+  public void add(int fieldIndex, double value) {
+    add(fieldIndex, new DoubleValue(value));
+  }
+
+  @Override
+  public void add(int fieldIndex, Group value) {
+    data[fieldIndex].add(value);
+  }
+
+  @Override
+  public GroupType getType() {
+    return schema;
+  }
+
+  @Override
+  public void writeValue(int field, int index, RecordConsumer recordConsumer) {
+    ((Primitive)getValue(field, index)).writeValue(recordConsumer);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroupFactory.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroupFactory.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroupFactory.java
new file mode 100644
index 0000000..e4df89f
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/SimpleGroupFactory.java
@@ -0,0 +1,38 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple;
+
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.GroupFactory;
+import org.apache.parquet.schema.MessageType;
+
+public class SimpleGroupFactory extends GroupFactory {
+
+  private final MessageType schema;
+
+  public SimpleGroupFactory(MessageType schema) {
+    this.schema = schema;
+  }
+
+  @Override
+  public Group newGroup() {
+    return new SimpleGroup(schema);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/GroupRecordConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/GroupRecordConverter.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/GroupRecordConverter.java
new file mode 100644
index 0000000..2edb9ce
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/GroupRecordConverter.java
@@ -0,0 +1,57 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple.convert;
+
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.io.api.GroupConverter;
+import org.apache.parquet.io.api.RecordMaterializer;
+import org.apache.parquet.schema.MessageType;
+
+public class GroupRecordConverter extends RecordMaterializer<Group> {
+
+  private final SimpleGroupFactory simpleGroupFactory;
+
+  private SimpleGroupConverter root;
+
+  public GroupRecordConverter(MessageType schema) {
+    this.simpleGroupFactory = new SimpleGroupFactory(schema);
+    this.root = new SimpleGroupConverter(null, 0, schema) {
+      @Override
+      public void start() {
+        this.current = simpleGroupFactory.newGroup();
+      }
+
+      @Override
+      public void end() {
+      }
+    };
+  }
+
+  @Override
+  public Group getCurrentRecord() {
+    return root.getCurrentRecord();
+  }
+
+  @Override
+  public GroupConverter getRootConverter() {
+    return root;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimpleGroupConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimpleGroupConverter.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimpleGroupConverter.java
new file mode 100644
index 0000000..09ce971
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimpleGroupConverter.java
@@ -0,0 +1,67 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple.convert;
+
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.io.api.Converter;
+import org.apache.parquet.io.api.GroupConverter;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.Type;
+
+class SimpleGroupConverter extends GroupConverter {
+  private final SimpleGroupConverter parent;
+  private final int index;
+  protected Group current;
+  private Converter[] converters;
+
+  SimpleGroupConverter(SimpleGroupConverter parent, int index, GroupType schema) {
+    this.parent = parent;
+    this.index = index;
+
+    converters = new Converter[schema.getFieldCount()];
+
+    for (int i = 0; i < converters.length; i++) {
+      final Type type = schema.getType(i);
+      if (type.isPrimitive()) {
+        converters[i] = new SimplePrimitiveConverter(this, i);
+      } else {
+        converters[i] = new SimpleGroupConverter(this, i, type.asGroupType());
+      }
+
+    }
+  }
+
+  @Override
+  public void start() {
+    current = parent.getCurrentRecord().addGroup(index);
+  }
+
+  @Override
+  public Converter getConverter(int fieldIndex) {
+    return converters[fieldIndex];
+  }
+
+  @Override
+  public void end() {
+  }
+
+  public Group getCurrentRecord() {
+    return current;
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimplePrimitiveConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimplePrimitiveConverter.java b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimplePrimitiveConverter.java
new file mode 100644
index 0000000..6815c1d
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/example/data/simple/convert/SimplePrimitiveConverter.java
@@ -0,0 +1,88 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.example.data.simple.convert;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+
+class SimplePrimitiveConverter extends PrimitiveConverter {
+
+  private final SimpleGroupConverter parent;
+  private final int index;
+
+  SimplePrimitiveConverter(SimpleGroupConverter parent, int index) {
+    this.parent = parent;
+    this.index = index;
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addBinary(Binary)
+   */
+  @Override
+  public void addBinary(Binary value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addBoolean(boolean)
+   */
+  @Override
+  public void addBoolean(boolean value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addDouble(double)
+   */
+  @Override
+  public void addDouble(double value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addFloat(float)
+   */
+  @Override
+  public void addFloat(float value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addInt(int)
+   */
+  @Override
+  public void addInt(int value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.io.api.PrimitiveConverter#addLong(long)
+   */
+  @Override
+  public void addLong(long value) {
+    parent.getCurrentRecord().add(index, value);
+  }
+
+}


Mime
View raw message