parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [33/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet.
Date Mon, 27 Apr 2015 23:12:30 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesReader.java b/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesReader.java
deleted file mode 100644
index b283e9b..0000000
--- a/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesReader.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.boundedint;
-
-import static parquet.Log.DEBUG;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.BytesUtils;
-import parquet.column.values.ValuesReader;
-import parquet.io.ParquetDecodingException;
-
-/**
- * @see BoundedIntValuesWriter
- */
-class BoundedIntValuesReader extends ValuesReader {
-  private static final Log LOG = Log.getLog(BoundedIntValuesReader.class);
-
-  private int currentValueCt = 0;
-  private int currentValue = 0;
-  private final int bitsPerValue;
-  private BitReader bitReader = new BitReader();
-  private int nextOffset;
-
-  public BoundedIntValuesReader(int bound) {
-    if (bound == 0) {
-      throw new ParquetDecodingException("Value bound cannot be 0. Use DevNullColumnReader instead.");
-    }
-    bitsPerValue = BytesUtils.getWidthFromMaxInt(bound);
-  }
-
-  @Override
-  public int readInteger() {
-    try {
-      if (currentValueCt > 0) {
-        currentValueCt--;
-        return currentValue;
-      }
-      if (bitReader.readBit()) {
-        currentValue = bitReader.readNBitInteger(bitsPerValue);
-        currentValueCt = bitReader.readUnsignedVarint() - 1;
-      } else {
-        currentValue = bitReader.readNBitInteger(bitsPerValue);
-      }
-      return currentValue;
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read int", e);
-    }
-  }
-
-  // This forces it to deserialize into memory. If it wanted
-  // to, it could just read the bytes (though that number of
-  // bytes would have to be serialized). This is the flip-side
-  // to BoundedIntColumnWriter.writeData(BytesOutput)
-  @Override
-  public void initFromPage(int valueCount, byte[] in, int offset) throws IOException {
-    if (DEBUG) LOG.debug("reading size at "+ offset + ": " + in[offset] + " " + in[offset + 1] + " " + in[offset + 2] + " " + in[offset + 3] + " ");
-    int totalBytes = BytesUtils.readIntLittleEndian(in, offset);
-    if (DEBUG) LOG.debug("will read "+ totalBytes + " bytes");
-    currentValueCt = 0;
-    currentValue = 0;
-    bitReader.prepare(in, offset + 4, totalBytes);
-    if (DEBUG) LOG.debug("will read next from " + (offset + totalBytes + 4));
-    this.nextOffset = offset + totalBytes + 4;
-  }
-  
-  @Override
-  public int getNextOffset() {
-    return this.nextOffset;
-  }
-
-  @Override
-  public void skip() {
-    readInteger();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesWriter.java
deleted file mode 100644
index 9e52189..0000000
--- a/parquet-column/src/main/java/parquet/column/values/boundedint/BoundedIntValuesWriter.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.boundedint;
-
-import static parquet.bytes.BytesInput.concat;
-import static parquet.column.Encoding.RLE;
-import parquet.Log;
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.bitpacking.BitPackingValuesWriter;
-import parquet.io.ParquetEncodingException;
-
-/**
- * This is a special ColumnWriter for the case when you need to write
- * integers in a known range. This is intended primarily for use with
- * repetition and definition levels, since the maximum value that will
- * be written is known a priori based on the schema. Assumption is that
- * the values written are between 0 and the bound, inclusive.
- *
- * This differs from {@link BitPackingValuesWriter} in that this also performs
- * run-length encoding of the data, so is useful when long runs of repeated
- * values are expected.
- */
-class BoundedIntValuesWriter extends ValuesWriter {
-  private static final Log LOG = Log.getLog(BoundedIntValuesWriter.class);
-
-  private int currentValue = -1;
-  private int currentValueCt = -1;
-  private boolean currentValueIsRepeated = false;
-  private boolean thereIsABufferedValue = false;
-  private int shouldRepeatThreshold = 0;
-  private int bitsPerValue;
-  private BitWriter bitWriter;
-  private boolean isFirst = true;
-
-  private static final int[] byteToTrueMask = new int[8];
-  static {
-    int currentMask = 1;
-    for (int i = 0; i < byteToTrueMask.length; i++) {
-      byteToTrueMask[i] = currentMask;
-      currentMask <<= 1;
-    }
-  }
-
-  public BoundedIntValuesWriter(int bound, int initialCapacity, int pageSize) {
-    if (bound == 0) {
-      throw new ParquetEncodingException("Value bound cannot be 0. Use DevNullColumnWriter instead.");
-    }
-    this.bitWriter = new BitWriter(initialCapacity, pageSize);
-    bitsPerValue = (int)Math.ceil(Math.log(bound + 1)/Math.log(2));
-    shouldRepeatThreshold = (bitsPerValue + 9)/(1 + bitsPerValue);
-    if (Log.DEBUG) LOG.debug("init column with bit width of " + bitsPerValue + " and repeat threshold of " + shouldRepeatThreshold);
-  }
-
-  @Override
-  public long getBufferedSize() {
-    // currentValue + currentValueCt = 8 bytes
-    // shouldRepeatThreshold + bitsPerValue = 8 bytes
-    // bitWriter = 8 bytes
-    // currentValueIsRepeated + isFirst = 2 bytes (rounded to 8 b/c of word boundaries)
-    return 32 + (bitWriter == null ? 0 : bitWriter.getMemSize());
-  }
-
-  // This assumes that the full state must be serialized, since there is no close method
-  @Override
-  public BytesInput getBytes() {
-    serializeCurrentValue();
-    BytesInput buf = bitWriter.finish();
-    if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes");
-    // We serialize the length so that on deserialization we can
-    // deserialize as we go, instead of having to load everything
-    // into memory
-    return concat(BytesInput.fromInt((int)buf.size()), buf);
-  }
-
-  @Override
-  public void reset() {
-    currentValue = -1;
-    currentValueCt = -1;
-    currentValueIsRepeated = false;
-    thereIsABufferedValue = false;
-    isFirst = true;
-    bitWriter.reset();
-  }
-
-  @Override
-  public void writeInteger(int val) {
-    if (currentValue == val) {
-      currentValueCt++;
-      if (!currentValueIsRepeated && currentValueCt >= shouldRepeatThreshold) {
-        currentValueIsRepeated = true;
-      }
-    } else {
-      if (!isFirst) {
-        serializeCurrentValue();
-      } else {
-        isFirst = false;
-      }
-
-      newCurrentValue(val);
-    }
-  }
-
-  private void serializeCurrentValue() {
-    if (thereIsABufferedValue) {
-      if (currentValueIsRepeated) {
-        bitWriter.writeBit(true);
-        bitWriter.writeNBitInteger(currentValue, bitsPerValue);
-        bitWriter.writeUnsignedVarint(currentValueCt);
-      } else {
-        for (int i = 0; i < currentValueCt; i++) {
-          bitWriter.writeBit(false);
-          bitWriter.writeNBitInteger(currentValue, bitsPerValue);
-        }
-      }
-    }
-    thereIsABufferedValue = false;
-  }
-
-  private void newCurrentValue(int val) {
-    currentValue = val;
-    currentValueCt = 1;
-    currentValueIsRepeated = false;
-    thereIsABufferedValue = true;
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return bitWriter.getCapacity();
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return RLE;
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return bitWriter.memUsageString(prefix);
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/boundedint/DevNullValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/boundedint/DevNullValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/boundedint/DevNullValuesWriter.java
deleted file mode 100644
index 2725b30..0000000
--- a/parquet-column/src/main/java/parquet/column/values/boundedint/DevNullValuesWriter.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.boundedint;
-
-import static parquet.column.Encoding.BIT_PACKED;
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.io.api.Binary;
-
-/**
- * This is a special writer that doesn't write anything. The idea being that
- * some columns will always be the same value, and this will capture that. An
- * example is the set of repetition levels for a schema with no repeated fields.
- */
-public class DevNullValuesWriter extends ValuesWriter {
-  @Override
-  public long getBufferedSize() {
-    return 0;
-  }
-
-  @Override
-  public void reset() {
-  }
-
-  @Override
-  public void writeInteger(int v) {
-  }
-
-  @Override
-  public void writeByte(int value) {
-  }
-
-  @Override
-  public void writeBoolean(boolean v) {
-  }
-
-  @Override
-  public void writeBytes(Binary v) {
-  }
-
-  @Override
-  public void writeLong(long v) {
-  }
-
-  @Override
-  public void writeDouble(double v) {
-  }
-
-  @Override
-  public void writeFloat(float v) {
-  }
-
-  @Override
-  public BytesInput getBytes() {
-    return BytesInput.empty();
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return 0;
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return BIT_PACKED;
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return prefix + "0";
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/boundedint/ZeroIntegerValuesReader.java b/parquet-column/src/main/java/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
deleted file mode 100644
index ab6d71c..0000000
--- a/parquet-column/src/main/java/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.boundedint;
-
-import java.io.IOException;
-
-import parquet.column.values.ValuesReader;
-
-/**
- * ColumnReader which does not read any actual data, but rather simply produces
- * an endless stream of constant values.
- * Mainly used to read definition levels when the only possible value is 0
- */
-public class ZeroIntegerValuesReader extends ValuesReader {
-  
-  private int nextOffset;
-
-  public int readInteger() {
-    return 0;
-  }
-
-  @Override
-  public void initFromPage(int valueCount, byte[] in, int offset) throws IOException {
-    this.nextOffset = offset;
-  }
-  
-  @Override
-  public int getNextOffset() {
-    return nextOffset;
-  }
-
-  @Override
-  public void skip() {
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingConfig.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingConfig.java b/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingConfig.java
deleted file mode 100644
index 260dec5..0000000
--- a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingConfig.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.delta;
-
-
-import parquet.Preconditions;
-import parquet.bytes.BytesInput;
-import parquet.bytes.BytesUtils;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * Config for delta binary packing
- *
- * @author Tianshuo Deng
- */
-class DeltaBinaryPackingConfig {
-  final int blockSizeInValues;
-  final int miniBlockNumInABlock;
-  final int miniBlockSizeInValues;
-
-  public DeltaBinaryPackingConfig(int blockSizeInValues, int miniBlockNumInABlock) {
-    this.blockSizeInValues = blockSizeInValues;
-    this.miniBlockNumInABlock = miniBlockNumInABlock;
-    double miniSize = (double) blockSizeInValues / miniBlockNumInABlock;
-    Preconditions.checkArgument(miniSize % 8 == 0, "miniBlockSize must be multiple of 8, but it's " + miniSize);
-    this.miniBlockSizeInValues = (int) miniSize;
-  }
-
-  public static DeltaBinaryPackingConfig readConfig(InputStream in) throws IOException {
-    return new DeltaBinaryPackingConfig(BytesUtils.readUnsignedVarInt(in),
-            BytesUtils.readUnsignedVarInt(in));
-  }
-
-  public BytesInput toBytesInput() {
-    return BytesInput.concat(
-            BytesInput.fromUnsignedVarInt(blockSizeInValues),
-            BytesInput.fromUnsignedVarInt(miniBlockNumInABlock));
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java b/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
deleted file mode 100644
index 9eafc18..0000000
--- a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.delta;
-
-
-import parquet.bytes.BytesUtils;
-import parquet.column.values.ValuesReader;
-import parquet.column.values.bitpacking.BytePacker;
-import parquet.column.values.bitpacking.Packer;
-import parquet.io.ParquetDecodingException;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-/**
- * Read values written by {@link DeltaBinaryPackingValuesWriter}
- *
- * @author Tianshuo Deng
- */
-public class DeltaBinaryPackingValuesReader extends ValuesReader {
-  private int totalValueCount;
-  /**
-   * values read by the caller
-   */
-  private int valuesRead;
-  private int minDeltaInCurrentBlock;
-  private byte[] page;
-  /**
-   * stores the decoded values including the first value which is written to the header
-   */
-  private int[] valuesBuffer;
-  /**
-   * values loaded to the buffer, it could be bigger than the totalValueCount
-   * when data is not aligned to mini block, which means padding 0s are in the buffer
-   */
-  private int valuesBuffered;
-  private ByteArrayInputStream in;
-  private int nextOffset;
-  private DeltaBinaryPackingConfig config;
-  private int[] bitWidths;
-
-  /**
-   * eagerly load all the data into memory
-   *
-   * @param valueCount count of values in this page
-   * @param page       the array to read from containing the page data (repetition levels, definition levels, data)
-   * @param offset     where to start reading from in the page
-   * @throws IOException
-   */
-  @Override
-  public void initFromPage(int valueCount, byte[] page, int offset) throws IOException {
-    in = new ByteArrayInputStream(page, offset, page.length - offset);
-    this.config = DeltaBinaryPackingConfig.readConfig(in);
-    this.page = page;
-    this.totalValueCount = BytesUtils.readUnsignedVarInt(in);
-    allocateValuesBuffer();
-    bitWidths = new int[config.miniBlockNumInABlock];
-
-    //read first value from header
-    valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarInt(in);
-
-    while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis
-      loadNewBlockToBuffer();
-    }
-    this.nextOffset = page.length - in.available();
-  }
-  
-  @Override
-  public int getNextOffset() {
-    return nextOffset;
-  }
-  
-  /**
-   * the value buffer is allocated so that the size of it is multiple of mini block
-   * because when writing, data is flushed on a mini block basis
-   */
-  private void allocateValuesBuffer() {
-    int totalMiniBlockCount = (int) Math.ceil((double) totalValueCount / config.miniBlockSizeInValues);
-    //+ 1 because first value written to header is also stored in values buffer
-    valuesBuffer = new int[totalMiniBlockCount * config.miniBlockSizeInValues + 1];
-  }
-
-  @Override
-  public void skip() {
-    checkRead();
-    valuesRead++;
-  }
-
-  @Override
-  public int readInteger() {
-    checkRead();
-    return valuesBuffer[valuesRead++];
-  }
-
-  private void checkRead() {
-    if (valuesRead >= totalValueCount) {
-      throw new ParquetDecodingException("no more value to read, total value count is " + totalValueCount);
-    }
-  }
-
-  private void loadNewBlockToBuffer() {
-    try {
-      minDeltaInCurrentBlock = BytesUtils.readZigZagVarInt(in);
-    } catch (IOException e) {
-      throw new ParquetDecodingException("can not read min delta in current block", e);
-    }
-
-    readBitWidthsForMiniBlocks();
-
-    // mini block is atomic for reading, we read a mini block when there are more values left
-    int i;
-    for (i = 0; i < config.miniBlockNumInABlock && valuesBuffered < totalValueCount; i++) {
-      BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidths[i]);
-      unpackMiniBlock(packer);
-    }
-
-    //calculate values from deltas unpacked for current block
-    int valueUnpacked=i*config.miniBlockSizeInValues;
-    for (int j = valuesBuffered-valueUnpacked; j < valuesBuffered; j++) {
-      int index = j;
-      valuesBuffer[index] += minDeltaInCurrentBlock + valuesBuffer[index - 1];
-    }
-  }
-
-  /**
-   * mini block has a size of 8*n, unpack 8 value each time
-   *
-   * @param packer the packer created from bitwidth of current mini block
-   */
-  private void unpackMiniBlock(BytePacker packer) {
-    for (int j = 0; j < config.miniBlockSizeInValues; j += 8) {
-      unpack8Values(packer);
-    }
-  }
-
-  private void unpack8Values(BytePacker packer) {
-    //calculate the pos because the packer api uses array not stream
-    int pos = page.length - in.available();
-    packer.unpack8Values(page, pos, valuesBuffer, valuesBuffered);
-    this.valuesBuffered += 8;
-    //sync the pos in stream
-    in.skip(packer.getBitWidth());
-  }
-
-  private void readBitWidthsForMiniBlocks() {
-    for (int i = 0; i < config.miniBlockNumInABlock; i++) {
-      try {
-        bitWidths[i] = BytesUtils.readIntLittleEndianOnOneByte(in);
-      } catch (IOException e) {
-        throw new ParquetDecodingException("Can not decode bitwidth in block header", e);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesWriter.java
deleted file mode 100644
index e2fa25a..0000000
--- a/parquet-column/src/main/java/parquet/column/values/delta/DeltaBinaryPackingValuesWriter.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.delta;
-
-import parquet.bytes.BytesInput;
-import parquet.bytes.BytesUtils;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.bitpacking.BytePacker;
-import parquet.column.values.bitpacking.Packer;
-import parquet.io.ParquetEncodingException;
-
-import java.io.IOException;
-
-/**
- * Write integers with delta encoding and binary packing
- * The format is as follows:
- * <p/>
- * <pre>
- *   {@code
- *     delta-binary-packing: <page-header> <block>*
- *     page-header := <block size in values> <number of miniblocks in a block> <total value count> <first value>
- *     block := <min delta> <list of bitwidths of miniblocks> <miniblocks>
- *
- *     min delta : zig-zag var int encoded
- *     bitWidthsOfMiniBlock : 1 byte little endian
- *     blockSizeInValues,blockSizeInValues,totalValueCount,firstValue : unsigned varint
- *   }
- * </pre>
- *
- * The algorithm and format is inspired by D. Lemire's paper: http://lemire.me/blog/archives/2012/09/12/fast-integer-compression-decoding-billions-of-integers-per-second/
- *
- * @author Tianshuo Deng
- */
-public class DeltaBinaryPackingValuesWriter extends ValuesWriter {
-  /**
-   * max bitwidth for a mini block, it is used to allocate miniBlockByteBuffer which is
-   * reused between flushes.
-   */
-  public static final int MAX_BITWIDTH = 32;
-
-  public static final int DEFAULT_NUM_BLOCK_VALUES = 128;
-
-  public static final int DEFAULT_NUM_MINIBLOCKS = 4;
-
-  private final CapacityByteArrayOutputStream baos;
-
-  /**
-   * stores blockSizeInValues, miniBlockNumInABlock and miniBlockSizeInValues
-   */
-  private final DeltaBinaryPackingConfig config;
-
-  /**
-   * bit width for each mini block, reused between flushes
-   */
-  private final int[] bitWidths;
-
-  private int totalValueCount = 0;
-
-  /**
-   * a pointer to deltaBlockBuffer indicating the end of deltaBlockBuffer
-   * the number of values in the deltaBlockBuffer that haven't flushed to baos
-   * it will be reset after each flush
-   */
-  private int deltaValuesToFlush = 0;
-
-  /**
-   * stores delta values starting from the 2nd value written(1st value is stored in header).
-   * It's reused between flushes
-   */
-  private int[] deltaBlockBuffer;
-
-  /**
-   * bytes buffer for a mini block, it is reused for each mini block.
-   * Therefore the size of biggest miniblock with bitwith of MAX_BITWITH is allocated
-   */
-  private byte[] miniBlockByteBuffer;
-
-  /**
-   * firstValue is written to the header of the page
-   */
-  private int firstValue = 0;
-
-  /**
-   * cache previous written value for calculating delta
-   */
-  private int previousValue = 0;
-
-  /**
-   * min delta is written to the beginning of each block.
-   * it's zig-zag encoded. The deltas stored in each block is actually the difference to min delta,
-   * therefore are all positive
-   * it will be reset after each flush
-   */
-  private int minDeltaInCurrentBlock = Integer.MAX_VALUE;
-
-  public DeltaBinaryPackingValuesWriter(int slabSize, int pageSize) {
-    this(DEFAULT_NUM_BLOCK_VALUES, DEFAULT_NUM_MINIBLOCKS, slabSize, pageSize);
-  }
-
-  public DeltaBinaryPackingValuesWriter(int blockSizeInValues, int miniBlockNum, int slabSize, int pageSize) {
-    this.config = new DeltaBinaryPackingConfig(blockSizeInValues, miniBlockNum);
-    bitWidths = new int[config.miniBlockNumInABlock];
-    deltaBlockBuffer = new int[blockSizeInValues];
-    miniBlockByteBuffer = new byte[config.miniBlockSizeInValues * MAX_BITWIDTH];
-    baos = new CapacityByteArrayOutputStream(slabSize, pageSize);
-  }
-
-  @Override
-  public long getBufferedSize() {
-    return baos.size();
-  }
-
-  @Override
-  public void writeInteger(int v) {
-    totalValueCount++;
-
-    if (totalValueCount == 1) {
-      firstValue = v;
-      previousValue = firstValue;
-      return;
-    }
-
-    int delta = v - previousValue;//calculate delta
-    previousValue = v;
-
-    deltaBlockBuffer[deltaValuesToFlush++] = delta;
-
-    if (delta < minDeltaInCurrentBlock) {
-      minDeltaInCurrentBlock = delta;
-    }
-
-    if (config.blockSizeInValues == deltaValuesToFlush) {
-      flushBlockBuffer();
-    }
-  }
-
-  private void flushBlockBuffer() {
-    //since we store the min delta, the deltas will be converted to be the difference to min delta and all positive
-    for (int i = 0; i < deltaValuesToFlush; i++) {
-      deltaBlockBuffer[i] = deltaBlockBuffer[i] - minDeltaInCurrentBlock;
-    }
-
-    writeMinDelta();
-    int miniBlocksToFlush = getMiniBlockCountToFlush(deltaValuesToFlush);
-
-    calculateBitWidthsForDeltaBlockBuffer(miniBlocksToFlush);
-    for (int i = 0; i < config.miniBlockNumInABlock; i++) {
-      writeBitWidthForMiniBlock(i);
-    }
-
-    for (int i = 0; i < miniBlocksToFlush; i++) {
-      //writing i th miniblock
-      int currentBitWidth = bitWidths[i];
-      BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(currentBitWidth);
-      int miniBlockStart = i * config.miniBlockSizeInValues;
-      for (int j = miniBlockStart; j < (i + 1) * config.miniBlockSizeInValues; j += 8) {//8 values per pack
-        // mini block is atomic in terms of flushing
-        // This may write more values when reach to the end of data writing to last mini block,
-        // since it may not be aligend to miniblock,
-        // but doesnt matter. The reader uses total count to see if reached the end.
-        packer.pack8Values(deltaBlockBuffer, j, miniBlockByteBuffer, 0);
-        baos.write(miniBlockByteBuffer, 0, currentBitWidth);
-      }
-    }
-
-    minDeltaInCurrentBlock = Integer.MAX_VALUE;
-    deltaValuesToFlush = 0;
-  }
-
-  private void writeBitWidthForMiniBlock(int i) {
-    try {
-      BytesUtils.writeIntLittleEndianOnOneByte(baos, bitWidths[i]);
-    } catch (IOException e) {
-      throw new ParquetEncodingException("can not write bitwith for miniblock", e);
-    }
-  }
-
-  private void writeMinDelta() {
-    try {
-      BytesUtils.writeZigZagVarInt(minDeltaInCurrentBlock, baos);
-    } catch (IOException e) {
-      throw new ParquetEncodingException("can not write min delta for block", e);
-    }
-  }
-
-  /**
-   * iterate through values in each mini block and calculate the bitWidths of max values.
-   *
-   * @param miniBlocksToFlush
-   */
-  private void calculateBitWidthsForDeltaBlockBuffer(int miniBlocksToFlush) {
-    for (int miniBlockIndex = 0; miniBlockIndex < miniBlocksToFlush; miniBlockIndex++) {
-
-      int mask = 0;
-      int miniStart = miniBlockIndex * config.miniBlockSizeInValues;
-
-      //The end of current mini block could be the end of current block(deltaValuesToFlush) buffer when data is not aligned to mini block
-      int miniEnd = Math.min((miniBlockIndex + 1) * config.miniBlockSizeInValues, deltaValuesToFlush);
-
-      for (int i = miniStart; i < miniEnd; i++) {
-        mask |= deltaBlockBuffer[i];
-      }
-      bitWidths[miniBlockIndex] = 32 - Integer.numberOfLeadingZeros(mask);
-    }
-  }
-
-  private int getMiniBlockCountToFlush(double numberCount) {
-    return (int) Math.ceil(numberCount / config.miniBlockSizeInValues);
-  }
-
-  /**
-   * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset()
-   *
-   * @return
-   */
-  @Override
-  public BytesInput getBytes() {
-    //The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount
-    if (deltaValuesToFlush != 0) {
-      flushBlockBuffer();
-    }
-    return BytesInput.concat(
-            config.toBytesInput(),
-            BytesInput.fromUnsignedVarInt(totalValueCount),
-            BytesInput.fromZigZagVarInt(firstValue),
-            BytesInput.from(baos));
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return Encoding.DELTA_BINARY_PACKED;
-  }
-
-  @Override
-  public void reset() {
-    this.totalValueCount = 0;
-    this.baos.reset();
-    this.deltaValuesToFlush = 0;
-    this.minDeltaInCurrentBlock = Integer.MAX_VALUE;
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return baos.getCapacity();
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return String.format("%s DeltaBinaryPacking %d bytes", prefix, getAllocatedSize());
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java b/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
deleted file mode 100644
index 8fb67a1..0000000
--- a/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.deltalengthbytearray;
-
-import static parquet.Log.DEBUG;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.column.values.ValuesReader;
-import parquet.column.values.delta.DeltaBinaryPackingValuesReader;
-import parquet.io.api.Binary;
-
-/**
- * Reads binary data written by {@link DeltaLengthByteArrayValueWriter}
- *
- * @author Aniket Mokashi
- *
- */
-public class DeltaLengthByteArrayValuesReader extends ValuesReader {
-
-  private static final Log LOG = Log.getLog(DeltaLengthByteArrayValuesReader.class);
-  private ValuesReader lengthReader;
-  private byte[] in;
-  private int offset;
-
-  public DeltaLengthByteArrayValuesReader() {
-    this.lengthReader = new DeltaBinaryPackingValuesReader();
-  }
-
-  @Override
-  public void initFromPage(int valueCount, byte[] in, int offset)
-      throws IOException {
-    if (DEBUG) LOG.debug("init from page at offset "+ offset + " for length " + (in.length - offset));
-    lengthReader.initFromPage(valueCount, in, offset);
-    offset = lengthReader.getNextOffset();
-    this.in = in;
-    this.offset = offset;
-  }
-
-  @Override
-  public Binary readBytes() {
-    int length = lengthReader.readInteger();
-    int start = offset;
-    offset = start + length;
-    return Binary.fromByteArray(in, start, length);
-  }
-
-  @Override
-  public void skip() {
-    int length = lengthReader.readInteger();
-    offset = offset + length;
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesWriter.java
deleted file mode 100644
index 3fed3f7..0000000
--- a/parquet-column/src/main/java/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesWriter.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.deltalengthbytearray;
-
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.BytesInput;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.bytes.LittleEndianDataOutputStream;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.delta.DeltaBinaryPackingValuesWriter;
-import parquet.io.ParquetEncodingException;
-import parquet.io.api.Binary;
-
-/**
- * Write lengths of byte-arrays using delta encoding, followed by concatenated byte-arrays
- * <pre>
- *   {@code
- *   delta-length-byte-array : length* byte-array*
- *   }
- * </pre>
- * @author Aniket Mokashi
- *
- */
-public class DeltaLengthByteArrayValuesWriter extends ValuesWriter {
-
-  private static final Log LOG = Log.getLog(DeltaLengthByteArrayValuesWriter.class);
-
-  private ValuesWriter lengthWriter;
-  private CapacityByteArrayOutputStream arrayOut;
-  private LittleEndianDataOutputStream out;
-
-  public DeltaLengthByteArrayValuesWriter(int initialSize, int pageSize) {
-    arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize);
-    out = new LittleEndianDataOutputStream(arrayOut);
-    lengthWriter = new DeltaBinaryPackingValuesWriter(
-        DeltaBinaryPackingValuesWriter.DEFAULT_NUM_BLOCK_VALUES,
-        DeltaBinaryPackingValuesWriter.DEFAULT_NUM_MINIBLOCKS,
-        initialSize, pageSize);
-  }
-
-  @Override
-  public void writeBytes(Binary v) {
-    try {
-      lengthWriter.writeInteger(v.length());
-      out.write(v.getBytes());
-    } catch (IOException e) {
-      throw new ParquetEncodingException("could not write bytes", e);
-    }
-  }
-
-  @Override
-  public long getBufferedSize() {
-    return lengthWriter.getBufferedSize() + arrayOut.size();
-  }
-
-  @Override
-  public BytesInput getBytes() {
-    try {
-      out.flush();
-    } catch (IOException e) {
-      throw new ParquetEncodingException("could not write page", e);
-    }
-    if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size());
-    return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut));
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return Encoding.DELTA_LENGTH_BYTE_ARRAY;
-  }
-
-  @Override
-  public void reset() {
-    lengthWriter.reset();
-    arrayOut.reset();
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return lengthWriter.getAllocatedSize() + arrayOut.getCapacity();
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return arrayOut.memUsageString(lengthWriter.memUsageString(prefix) + " DELTA_LENGTH_BYTE_ARRAY");
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayReader.java b/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayReader.java
deleted file mode 100644
index 1493f9b..0000000
--- a/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayReader.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.deltastrings;
-
-import java.io.IOException;
-
-import parquet.column.values.ValuesReader;
-import parquet.column.values.delta.DeltaBinaryPackingValuesReader;
-import parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesReader;
-import parquet.io.api.Binary;
-
-/**
- * Reads binary data written by {@link DeltaByteArrayWriter}
- * 
- * @author Aniket Mokashi
- *
- */
-public class DeltaByteArrayReader extends ValuesReader {
-  private ValuesReader prefixLengthReader;
-  private ValuesReader suffixReader;
-
-  private Binary previous;
-
-  public DeltaByteArrayReader() {
-    this.prefixLengthReader = new DeltaBinaryPackingValuesReader();
-    this.suffixReader = new DeltaLengthByteArrayValuesReader();
-    this.previous = Binary.fromByteArray(new byte[0]);
-  }
-
-  @Override
-  public void initFromPage(int valueCount, byte[] page, int offset)
-      throws IOException {
-    prefixLengthReader.initFromPage(valueCount, page, offset);
-    int next = prefixLengthReader.getNextOffset();
-    suffixReader.initFromPage(valueCount, page, next);	
-  }
-
-  @Override
-  public void skip() {
-    prefixLengthReader.skip();
-    suffixReader.skip();
-  }
-
-  @Override
-  public Binary readBytes() {
-    int prefixLength = prefixLengthReader.readInteger();
-    // This does not copy bytes
-    Binary suffix = suffixReader.readBytes();
-    int length = prefixLength + suffix.length();
-    
-    // We have to do this to materialize the output
-    if(prefixLength != 0) {
-      byte[] out = new byte[length];
-      System.arraycopy(previous.getBytes(), 0, out, 0, prefixLength);
-      System.arraycopy(suffix.getBytes(), 0, out, prefixLength, suffix.length());
-      previous =  Binary.fromByteArray(out);
-    } else {
-      previous = suffix;
-    }
-    return previous;
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayWriter.java b/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
deleted file mode 100644
index 0d1200a..0000000
--- a/parquet-column/src/main/java/parquet/column/values/deltastrings/DeltaByteArrayWriter.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.deltastrings;
-
-import parquet.bytes.BytesInput;
-import parquet.column.Encoding;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.delta.DeltaBinaryPackingValuesWriter;
-import parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesWriter;
-import parquet.io.api.Binary;
-
-/**
- * Write prefix lengths using delta encoding, followed by suffixes with Delta length byte arrays
- * <pre>
- *   {@code
- *   delta-length-byte-array : prefix-length* suffixes*
- *   }
- * </pre>
- * @author Aniket Mokashi
- *
- */
-public class DeltaByteArrayWriter extends ValuesWriter{
-
-  private ValuesWriter prefixLengthWriter;
-  private ValuesWriter suffixWriter;
-  private byte[] previous;
-
-  public DeltaByteArrayWriter(int initialCapacity, int pageSize) {
-    this.prefixLengthWriter = new DeltaBinaryPackingValuesWriter(128, 4, initialCapacity, pageSize);
-    this.suffixWriter = new DeltaLengthByteArrayValuesWriter(initialCapacity, pageSize);
-    this.previous = new byte[0];
-  }
-
-  @Override
-  public long getBufferedSize() {
-    return prefixLengthWriter.getBufferedSize() + suffixWriter.getBufferedSize();
-  }
-
-  @Override
-  public BytesInput getBytes() {
-    return BytesInput.concat(prefixLengthWriter.getBytes(), suffixWriter.getBytes());
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return Encoding.DELTA_BYTE_ARRAY;
-  }
-
-  @Override
-  public void reset() {
-    prefixLengthWriter.reset();
-    suffixWriter.reset();
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return prefixLengthWriter.getAllocatedSize() + suffixWriter.getAllocatedSize();
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    prefix = prefixLengthWriter.memUsageString(prefix);
-    return suffixWriter.memUsageString(prefix + "  DELTA_STRINGS");
-  }
-
-  @Override
-  public void writeBytes(Binary v) {
-    int i = 0;
-    byte[] vb = v.getBytes();
-    int length = previous.length < vb.length ? previous.length : vb.length;
-    for(i = 0; (i < length) && (previous[i] == vb[i]); i++);
-    prefixLengthWriter.writeInteger(i);
-    suffixWriter.writeBytes(Binary.fromByteArray(vb, i, vb.length - i));
-    previous = vb;
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesReader.java b/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesReader.java
deleted file mode 100644
index 1d1bbeb..0000000
--- a/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesReader.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.dictionary;
-
-import static parquet.Log.DEBUG;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-
-import parquet.Log;
-import parquet.bytes.BytesUtils;
-import parquet.column.Dictionary;
-import parquet.column.values.ValuesReader;
-import parquet.column.values.rle.RunLengthBitPackingHybridDecoder;
-import parquet.io.ParquetDecodingException;
-import parquet.io.api.Binary;
-
-/**
- * Reads values that have been dictionary encoded
- *
- * @author Julien Le Dem
- *
- */
-public class DictionaryValuesReader extends ValuesReader {
-  private static final Log LOG = Log.getLog(DictionaryValuesReader.class);
-
-  private ByteArrayInputStream in;
-
-  private Dictionary dictionary;
-
-  private RunLengthBitPackingHybridDecoder decoder;
-
-  public DictionaryValuesReader(Dictionary dictionary) {
-    this.dictionary = dictionary;
-  }
-
-  @Override
-  public void initFromPage(int valueCount, byte[] page, int offset)
-      throws IOException {
-    this.in = new ByteArrayInputStream(page, offset, page.length - offset);
-    if (page.length - offset > 0) {
-      if (DEBUG)
-        LOG.debug("init from page at offset " + offset + " for length " + (page.length - offset));
-      int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in);
-      if (DEBUG) LOG.debug("bit width " + bitWidth);
-      decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in);
-    } else {
-      decoder = new RunLengthBitPackingHybridDecoder(1, in) {
-        @Override
-        public int readInt() throws IOException {
-          throw new IOException("Attempt to read from empty page");
-        }
-      };
-    }
-  }
-
-  @Override
-  public int readValueDictionaryId() {
-    try {
-      return decoder.readInt();
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public Binary readBytes() {
-    try {
-      return dictionary.decodeToBinary(decoder.readInt());
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public float readFloat() {
-    try {
-      return dictionary.decodeToFloat(decoder.readInt());
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public double readDouble() {
-    try {
-      return dictionary.decodeToDouble(decoder.readInt());
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public int readInteger() {
-    try {
-      return dictionary.decodeToInt(decoder.readInt());
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public long readLong() {
-    try {
-      return dictionary.decodeToLong(decoder.readInt());
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-
-  @Override
-  public void skip() {
-    try {
-      decoder.readInt(); // Type does not matter as we are just skipping dictionary keys
-    } catch (IOException e) {
-      throw new ParquetDecodingException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesWriter.java b/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesWriter.java
deleted file mode 100644
index c986c79..0000000
--- a/parquet-column/src/main/java/parquet/column/values/dictionary/DictionaryValuesWriter.java
+++ /dev/null
@@ -1,625 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.dictionary;
-
-import static parquet.Log.DEBUG;
-import static parquet.bytes.BytesInput.concat;
-import it.unimi.dsi.fastutil.doubles.Double2IntLinkedOpenHashMap;
-import it.unimi.dsi.fastutil.doubles.Double2IntMap;
-import it.unimi.dsi.fastutil.doubles.DoubleIterator;
-import it.unimi.dsi.fastutil.floats.Float2IntLinkedOpenHashMap;
-import it.unimi.dsi.fastutil.floats.Float2IntMap;
-import it.unimi.dsi.fastutil.floats.FloatIterator;
-import it.unimi.dsi.fastutil.ints.Int2IntLinkedOpenHashMap;
-import it.unimi.dsi.fastutil.ints.Int2IntMap;
-import it.unimi.dsi.fastutil.longs.Long2IntLinkedOpenHashMap;
-import it.unimi.dsi.fastutil.longs.Long2IntMap;
-import it.unimi.dsi.fastutil.longs.LongIterator;
-import it.unimi.dsi.fastutil.objects.Object2IntLinkedOpenHashMap;
-import it.unimi.dsi.fastutil.objects.Object2IntMap;
-import it.unimi.dsi.fastutil.objects.ObjectIterator;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Iterator;
-
-import parquet.Log;
-import parquet.bytes.BytesInput;
-import parquet.bytes.BytesUtils;
-import parquet.bytes.CapacityByteArrayOutputStream;
-import parquet.column.Encoding;
-import parquet.column.page.DictionaryPage;
-import parquet.column.values.RequiresFallback;
-import parquet.column.values.ValuesWriter;
-import parquet.column.values.dictionary.IntList.IntIterator;
-import parquet.column.values.plain.FixedLenByteArrayPlainValuesWriter;
-import parquet.column.values.plain.PlainValuesWriter;
-import parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
-import parquet.io.ParquetEncodingException;
-import parquet.io.api.Binary;
-
-/**
- * Will attempt to encode values using a dictionary and fall back to plain encoding
- *  if the dictionary gets too big
- *
- * @author Julien Le Dem
- *
- */
-public abstract class DictionaryValuesWriter extends ValuesWriter implements RequiresFallback {
-  private static final Log LOG = Log.getLog(DictionaryValuesWriter.class);
-
-  /* max entries allowed for the dictionary will fail over to plain encoding if reached */
-  private static final int MAX_DICTIONARY_ENTRIES = Integer.MAX_VALUE - 1;
-  private static final int MIN_INITIAL_SLAB_SIZE = 64;
-
-  /* encoding to label the data page */
-  private final Encoding encodingForDataPage;
-
-  /* encoding to label the dictionary page */
-  protected final Encoding encodingForDictionaryPage;
-
-  /* maximum size in bytes allowed for the dictionary will fail over to plain encoding if reached */
-  protected final int maxDictionaryByteSize;
-
-  /* will become true if the dictionary becomes too big */
-  protected boolean dictionaryTooBig;
-
-  /* current size in bytes the dictionary will take once serialized */
-  protected int dictionaryByteSize;
-
-  /* size in bytes of the dictionary at the end of last dictionary encoded page (in case the current page falls back to PLAIN) */
-  protected int lastUsedDictionaryByteSize;
-
-  /* size in items of the dictionary at the end of last dictionary encoded page (in case the current page falls back to PLAIN) */
-  protected int lastUsedDictionarySize;
-
-  /* dictionary encoded values */
-  protected IntList encodedValues = new IntList();
-
-  /**
-   * @param maxDictionaryByteSize
-   */
-  protected DictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-    this.maxDictionaryByteSize = maxDictionaryByteSize;
-    this.encodingForDataPage = encodingForDataPage;
-    this.encodingForDictionaryPage = encodingForDictionaryPage;
-  }
-
-  protected DictionaryPage dictPage(ValuesWriter dictionaryEncoder) {
-    return new DictionaryPage(dictionaryEncoder.getBytes(), lastUsedDictionarySize, encodingForDictionaryPage);
-  }
-
-  @Override
-  public boolean shouldFallBack() {
-    // if the dictionary reaches the max byte size or the values can not be encoded on 4 bytes anymore.
-    return dictionaryByteSize > maxDictionaryByteSize
-        || getDictionarySize() > MAX_DICTIONARY_ENTRIES;
-  }
-
-  @Override
-  public boolean isCompressionSatisfying(long rawSize, long encodedSize) {
-    return (encodedSize + dictionaryByteSize) < rawSize;
-  }
-
-  @Override
-  public void fallBackAllValuesTo(ValuesWriter writer) {
-    fallBackDictionaryEncodedData(writer);
-    if (lastUsedDictionarySize == 0) {
-      // if we never used the dictionary
-      // we free dictionary encoded data
-      clearDictionaryContent();
-      dictionaryByteSize = 0;
-      encodedValues = new IntList();
-    }
-  }
-
-  abstract protected void fallBackDictionaryEncodedData(ValuesWriter writer);
-
-  @Override
-  public long getBufferedSize() {
-    return encodedValues.size() * 4;
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    // size used in memory
-    return encodedValues.size() * 4 + dictionaryByteSize;
-  }
-
-  @Override
-  public BytesInput getBytes() {
-    int maxDicId = getDictionarySize() - 1;
-    if (DEBUG) LOG.debug("max dic id " + maxDicId);
-    int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
-
-    int initialSlabSize =
-        CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
-
-    RunLengthBitPackingHybridEncoder encoder =
-        new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize);
-    IntIterator iterator = encodedValues.iterator();
-    try {
-      while (iterator.hasNext()) {
-        encoder.writeInt(iterator.next());
-      }
-      // encodes the bit width
-      byte[] bytesHeader = new byte[] { (byte) bitWidth };
-      BytesInput rleEncodedBytes = encoder.toBytes();
-      if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size());
-      BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
-      // remember size of dictionary when we last wrote a page
-      lastUsedDictionarySize = getDictionarySize();
-      lastUsedDictionaryByteSize = dictionaryByteSize;
-      return bytes;
-    } catch (IOException e) {
-      throw new ParquetEncodingException("could not encode the values", e);
-    }
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return encodingForDataPage;
-  }
-
-  @Override
-  public void reset() {
-    encodedValues = new IntList();
-  }
-
-  @Override
-  public void resetDictionary() {
-    lastUsedDictionaryByteSize = 0;
-    lastUsedDictionarySize = 0;
-    dictionaryTooBig = false;
-    clearDictionaryContent();
-  }
-
-  /**
-   * clear/free the underlying dictionary content
-   */
-  protected abstract void clearDictionaryContent();
-
-  /**
-   * @return size in items
-   */
-  protected abstract int getDictionarySize();
-
-  @Override
-  public String memUsageString(String prefix) {
-    return String.format(
-        "%s DictionaryValuesWriter{\n"
-          + "%s\n"
-          + "%s\n"
-        + "%s}\n",
-        prefix,
-        prefix + " dict:" + dictionaryByteSize,
-        prefix + " values:" + String.valueOf(encodedValues.size() * 4),
-        prefix
-        );
-  }
-
-  /**
-   *
-   */
-  public static class PlainBinaryDictionaryValuesWriter extends DictionaryValuesWriter {
-
-    /* type specific dictionary content */
-    protected Object2IntMap<Binary> binaryDictionaryContent = new Object2IntLinkedOpenHashMap<Binary>();
-
-    /**
-     * @param maxDictionaryByteSize
-     */
-    public PlainBinaryDictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      binaryDictionaryContent.defaultReturnValue(-1);
-    }
-
-    @Override
-    public void writeBytes(Binary v) {
-      int id = binaryDictionaryContent.getInt(v);
-      if (id == -1) {
-        id = binaryDictionaryContent.size();
-        binaryDictionaryContent.put(copy(v), id);
-        // length as int (4 bytes) + actual bytes
-        dictionaryByteSize += 4 + v.length();
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        Iterator<Binary> binaryIterator = binaryDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          Binary entry = binaryIterator.next();
-          dictionaryEncoder.writeBytes(entry);
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-
-    @Override
-    public int getDictionarySize() {
-      return binaryDictionaryContent.size();
-    }
-
-    @Override
-    protected void clearDictionaryContent() {
-      binaryDictionaryContent.clear();
-    }
-
-    @Override
-    public void fallBackDictionaryEncodedData(ValuesWriter writer) {
-      //build reverse dictionary
-      Binary[] reverseDictionary = new Binary[getDictionarySize()];
-      for (Object2IntMap.Entry<Binary> entry : binaryDictionaryContent.object2IntEntrySet()) {
-        reverseDictionary[entry.getIntValue()] = entry.getKey();
-      }
-
-      //fall back to plain encoding
-      IntIterator iterator = encodedValues.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.next();
-        writer.writeBytes(reverseDictionary[id]);
-      }
-    }
-
-    protected static Binary copy(Binary binary) {
-      return Binary.fromByteArray(
-          Arrays.copyOf(binary.getBytes(), binary.length()));
-    }
-  }
-
-  /**
-   *
-   */
-  public static class PlainFixedLenArrayDictionaryValuesWriter extends PlainBinaryDictionaryValuesWriter {
-
-    private final int length;
-
-    /**
-     * @param maxDictionaryByteSize
-     * @param initialSize
-     */
-    public PlainFixedLenArrayDictionaryValuesWriter(int maxDictionaryByteSize, int length, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      this.length = length;
-    }
-
-    @Override
-    public void writeBytes(Binary value) {
-      int id = binaryDictionaryContent.getInt(value);
-      if (id == -1) {
-        id = binaryDictionaryContent.size();
-        binaryDictionaryContent.put(copy(value), id);
-        dictionaryByteSize += length;
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        FixedLenByteArrayPlainValuesWriter dictionaryEncoder = new FixedLenByteArrayPlainValuesWriter(length, lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        Iterator<Binary> binaryIterator = binaryDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          Binary entry = binaryIterator.next();
-          dictionaryEncoder.writeBytes(entry);
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-  }
-
-  /**
-   *
-   */
-  public static class PlainLongDictionaryValuesWriter extends DictionaryValuesWriter {
-
-    /* type specific dictionary content */
-    private Long2IntMap longDictionaryContent = new Long2IntLinkedOpenHashMap();
-
-    /**
-     * @param maxDictionaryByteSize
-     * @param initialSize
-     */
-    public PlainLongDictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      longDictionaryContent.defaultReturnValue(-1);
-    }
-
-    @Override
-    public void writeLong(long v) {
-      int id = longDictionaryContent.get(v);
-      if (id == -1) {
-        id = longDictionaryContent.size();
-        longDictionaryContent.put(v, id);
-        dictionaryByteSize += 8;
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        LongIterator longIterator = longDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          dictionaryEncoder.writeLong(longIterator.nextLong());
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-
-    @Override
-    public int getDictionarySize() {
-      return longDictionaryContent.size();
-    }
-
-    @Override
-    protected void clearDictionaryContent() {
-      longDictionaryContent.clear();
-    }
-
-    @Override
-    public void fallBackDictionaryEncodedData(ValuesWriter writer) {
-      //build reverse dictionary
-      long[] reverseDictionary = new long[getDictionarySize()];
-      ObjectIterator<Long2IntMap.Entry> entryIterator = longDictionaryContent.long2IntEntrySet().iterator();
-      while (entryIterator.hasNext()) {
-        Long2IntMap.Entry entry = entryIterator.next();
-        reverseDictionary[entry.getIntValue()] = entry.getLongKey();
-      }
-
-      //fall back to plain encoding
-      IntIterator iterator = encodedValues.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.next();
-        writer.writeLong(reverseDictionary[id]);
-      }
-    }
-  }
-
-  /**
-   *
-   */
-  public static class PlainDoubleDictionaryValuesWriter extends DictionaryValuesWriter {
-
-    /* type specific dictionary content */
-    private Double2IntMap doubleDictionaryContent = new Double2IntLinkedOpenHashMap();
-
-    /**
-     * @param maxDictionaryByteSize
-     * @param initialSize
-     */
-    public PlainDoubleDictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      doubleDictionaryContent.defaultReturnValue(-1);
-    }
-
-    @Override
-    public void writeDouble(double v) {
-      int id = doubleDictionaryContent.get(v);
-      if (id == -1) {
-        id = doubleDictionaryContent.size();
-        doubleDictionaryContent.put(v, id);
-        dictionaryByteSize += 8;
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        DoubleIterator doubleIterator = doubleDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          dictionaryEncoder.writeDouble(doubleIterator.nextDouble());
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-
-    @Override
-    public int getDictionarySize() {
-      return doubleDictionaryContent.size();
-    }
-
-    @Override
-    protected void clearDictionaryContent() {
-      doubleDictionaryContent.clear();
-    }
-
-    @Override
-    public void fallBackDictionaryEncodedData(ValuesWriter writer) {
-      //build reverse dictionary
-      double[] reverseDictionary = new double[getDictionarySize()];
-      ObjectIterator<Double2IntMap.Entry> entryIterator = doubleDictionaryContent.double2IntEntrySet().iterator();
-      while (entryIterator.hasNext()) {
-        Double2IntMap.Entry entry = entryIterator.next();
-        reverseDictionary[entry.getIntValue()] = entry.getDoubleKey();
-      }
-
-      //fall back to plain encoding
-      IntIterator iterator = encodedValues.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.next();
-        writer.writeDouble(reverseDictionary[id]);
-      }
-    }
-  }
-
-  /**
-   *
-   */
-  public static class PlainIntegerDictionaryValuesWriter extends DictionaryValuesWriter {
-
-    /* type specific dictionary content */
-    private Int2IntMap intDictionaryContent = new Int2IntLinkedOpenHashMap();
-
-    /**
-     * @param maxDictionaryByteSize
-     * @param initialSize
-     */
-    public PlainIntegerDictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      intDictionaryContent.defaultReturnValue(-1);
-    }
-
-    @Override
-    public void writeInteger(int v) {
-      int id = intDictionaryContent.get(v);
-      if (id == -1) {
-        id = intDictionaryContent.size();
-        intDictionaryContent.put(v, id);
-        dictionaryByteSize += 4;
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        it.unimi.dsi.fastutil.ints.IntIterator intIterator = intDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          dictionaryEncoder.writeInteger(intIterator.nextInt());
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-
-    @Override
-    public int getDictionarySize() {
-      return intDictionaryContent.size();
-    }
-
-    @Override
-    protected void clearDictionaryContent() {
-      intDictionaryContent.clear();
-    }
-
-    @Override
-    public void fallBackDictionaryEncodedData(ValuesWriter writer) {
-      //build reverse dictionary
-      int[] reverseDictionary = new int[getDictionarySize()];
-      ObjectIterator<Int2IntMap.Entry> entryIterator = intDictionaryContent.int2IntEntrySet().iterator();
-      while (entryIterator.hasNext()) {
-        Int2IntMap.Entry entry = entryIterator.next();
-        reverseDictionary[entry.getIntValue()] = entry.getIntKey();
-      }
-
-      //fall back to plain encoding
-      IntIterator iterator = encodedValues.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.next();
-        writer.writeInteger(reverseDictionary[id]);
-      }
-    }
-  }
-
-  /**
-   *
-   */
-  public static class PlainFloatDictionaryValuesWriter extends DictionaryValuesWriter {
-
-    /* type specific dictionary content */
-    private Float2IntMap floatDictionaryContent = new Float2IntLinkedOpenHashMap();
-
-    /**
-     * @param maxDictionaryByteSize
-     * @param initialSize
-     */
-    public PlainFloatDictionaryValuesWriter(int maxDictionaryByteSize, Encoding encodingForDataPage, Encoding encodingForDictionaryPage) {
-      super(maxDictionaryByteSize, encodingForDataPage, encodingForDictionaryPage);
-      floatDictionaryContent.defaultReturnValue(-1);
-    }
-
-    @Override
-    public void writeFloat(float v) {
-      int id = floatDictionaryContent.get(v);
-      if (id == -1) {
-        id = floatDictionaryContent.size();
-        floatDictionaryContent.put(v, id);
-        dictionaryByteSize += 4;
-      }
-      encodedValues.add(id);
-    }
-
-    @Override
-    public DictionaryPage createDictionaryPage() {
-      if (lastUsedDictionarySize > 0) {
-        // return a dictionary only if we actually used it
-        PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize);
-        FloatIterator floatIterator = floatDictionaryContent.keySet().iterator();
-        // write only the part of the dict that we used
-        for (int i = 0; i < lastUsedDictionarySize; i++) {
-          dictionaryEncoder.writeFloat(floatIterator.nextFloat());
-        }
-        return dictPage(dictionaryEncoder);
-      }
-      return null;
-    }
-
-    @Override
-    public int getDictionarySize() {
-      return floatDictionaryContent.size();
-    }
-
-    @Override
-    protected void clearDictionaryContent() {
-      floatDictionaryContent.clear();
-    }
-
-    @Override
-    public void fallBackDictionaryEncodedData(ValuesWriter writer) {
-      //build reverse dictionary
-      float[] reverseDictionary = new float[getDictionarySize()];
-      ObjectIterator<Float2IntMap.Entry> entryIterator = floatDictionaryContent.float2IntEntrySet().iterator();
-      while (entryIterator.hasNext()) {
-        Float2IntMap.Entry entry = entryIterator.next();
-        reverseDictionary[entry.getIntValue()] = entry.getFloatKey();
-      }
-
-      //fall back to plain encoding
-      IntIterator iterator = encodedValues.iterator();
-      while (iterator.hasNext()) {
-        int id = iterator.next();
-        writer.writeFloat(reverseDictionary[id]);
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/values/dictionary/IntList.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/values/dictionary/IntList.java b/parquet-column/src/main/java/parquet/column/values/dictionary/IntList.java
deleted file mode 100644
index 40dbd5f..0000000
--- a/parquet-column/src/main/java/parquet/column/values/dictionary/IntList.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column.values.dictionary;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * An append-only integer list
- * avoids autoboxing and buffer resizing
- *
- *
- * @author Julien Le Dem
- *
- */
-public class IntList {
-
-  private static final int SLAB_SIZE = 64 * 1024;
-
-  /**
-   * to iterate on the content of the list
-   * not an actual iterator to avoid autoboxing
-   *
-   * @author Julien Le Dem
-   *
-   */
-  public static class IntIterator {
-
-    private final int[][] slabs;
-    private int current;
-    private final int count;
-
-    /**
-     * slabs will be iterated in order up to the provided count
-     * as the last slab may not be full
-     * @param slabs contain the ints
-     * @param count total count of ints
-     */
-    public IntIterator(int[][] slabs, int count) {
-      this.slabs = slabs;
-      this.count = count;
-    }
-
-    /**
-     * @return wether there is a next value
-     */
-    public boolean hasNext() {
-      return current < count;
-    }
-
-    /**
-     * @return the next int
-     */
-    public int next() {
-      final int result = slabs[current / SLAB_SIZE][current % SLAB_SIZE];
-      ++ current;
-      return result;
-    }
-
-  }
-
-  private List<int[]> slabs = new ArrayList<int[]>();
-  private int[] currentSlab;
-  private int currentSlabPos;
-
-  /**
-   * construct an empty list
-   */
-  public IntList() {
-    initSlab();
-  }
-
-  private void initSlab() {
-    currentSlab = new int[SLAB_SIZE];
-    currentSlabPos = 0;
-  }
-
-  /**
-   * @param i value to append to the end of the list
-   */
-  public void add(int i) {
-    if (currentSlabPos == currentSlab.length) {
-      slabs.add(currentSlab);
-      initSlab();
-    }
-    currentSlab[currentSlabPos] = i;
-    ++ currentSlabPos;
-  }
-
-  /**
-   * (not an actual Iterable)
-   * @return an IntIterator on the content
-   */
-  public IntIterator iterator() {
-    int[][] itSlabs = slabs.toArray(new int[slabs.size() + 1][]);
-    itSlabs[slabs.size()] = currentSlab;
-    return new IntIterator(itSlabs, SLAB_SIZE * slabs.size() + currentSlabPos);
-  }
-
-  /**
-   * @return the current size of the list
-   */
-  public int size() {
-    return SLAB_SIZE * slabs.size() + currentSlabPos;
-  }
-
-}


Mime
View raw message