parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject parquet-mr git commit: PARQUET-543: Remove unused boundedint package.
Date Sun, 17 Jul 2016 23:27:25 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 626014eaf -> 6a62646bf


PARQUET-543: Remove unused boundedint package.

This relocates the DevNullValuesWriter and ZeroIntegerValuesReader,
which are used but are not related to the boundedint code.

Author: Ryan Blue <blue@apache.org>

Closes #329 from rdblue/PARQUET-543-remove-boundedint and squashes the following commits:

0158c51 [Ryan Blue] PARQUET-543: Update new import in ParquetProperties.
550a1a3 [Ryan Blue] PARQUET-543: Remove unused boundedint package.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/6a62646b
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/6a62646b
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/6a62646b

Branch: refs/heads/master
Commit: 6a62646bfcecec9c0806a216b17e1a4ccb4609aa
Parents: 626014e
Author: Ryan Blue <blue@apache.org>
Authored: Sun Jul 17 16:27:20 2016 -0700
Committer: Ryan Blue <blue@apache.org>
Committed: Sun Jul 17 16:27:20 2016 -0700

----------------------------------------------------------------------
 .../org/apache/parquet/column/Encoding.java     |   2 +-
 .../parquet/column/ParquetProperties.java       |   2 +-
 .../values/bitpacking/DevNullValuesWriter.java  |  89 ++++++++++
 .../column/values/boundedint/BitReader.java     | 124 -------------
 .../column/values/boundedint/BitWriter.java     | 167 ------------------
 .../boundedint/BoundedIntValuesFactory.java     |  33 ----
 .../boundedint/BoundedIntValuesReader.java      |  94 ----------
 .../boundedint/BoundedIntValuesWriter.java      | 165 -----------------
 .../values/boundedint/DevNullValuesWriter.java  |  89 ----------
 .../boundedint/ZeroIntegerValuesReader.java     |  53 ------
 .../values/rle/ZeroIntegerValuesReader.java     |  53 ++++++
 .../values/boundedint/TestBoundedColumns.java   | 175 -------------------
 12 files changed, 144 insertions(+), 902 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java b/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java
index 0a24e76..3f21a4e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java
@@ -31,7 +31,7 @@ import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.column.values.bitpacking.ByteBitPackingValuesReader;
-import org.apache.parquet.column.values.boundedint.ZeroIntegerValuesReader;
+import org.apache.parquet.column.values.rle.ZeroIntegerValuesReader;
 import org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader;
 import org.apache.parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesReader;
 import org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader;

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 9ed7736..e3881f8 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -31,7 +31,7 @@ import org.apache.parquet.column.impl.ColumnWriteStoreV1;
 import org.apache.parquet.column.impl.ColumnWriteStoreV2;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.values.ValuesWriter;
-import org.apache.parquet.column.values.boundedint.DevNullValuesWriter;
+import org.apache.parquet.column.values.bitpacking.DevNullValuesWriter;
 import org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesWriterForInteger;
 import org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesWriterForLong;
 import org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter;

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/DevNullValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/DevNullValuesWriter.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/DevNullValuesWriter.java
new file mode 100644
index 0000000..a6fa1e3
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bitpacking/DevNullValuesWriter.java
@@ -0,0 +1,89 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bitpacking;
+
+import static org.apache.parquet.column.Encoding.BIT_PACKED;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.io.api.Binary;
+
+/**
+ * This is a special writer that doesn't write anything. The idea being that
+ * some columns will always be the same value, and this will capture that. An
+ * example is the set of repetition levels for a schema with no repeated fields.
+ */
+public class DevNullValuesWriter extends ValuesWriter {
+  @Override
+  public long getBufferedSize() {
+    return 0;
+  }
+
+  @Override
+  public void reset() {
+  }
+
+  @Override
+  public void writeInteger(int v) {
+  }
+
+  @Override
+  public void writeByte(int value) {
+  }
+
+  @Override
+  public void writeBoolean(boolean v) {
+  }
+
+  @Override
+  public void writeBytes(Binary v) {
+  }
+
+  @Override
+  public void writeLong(long v) {
+  }
+
+  @Override
+  public void writeDouble(double v) {
+  }
+
+  @Override
+  public void writeFloat(float v) {
+  }
+
+  @Override
+  public BytesInput getBytes() {
+    return BytesInput.empty();
+  }
+
+  @Override
+  public long getAllocatedSize() {
+    return 0;
+  }
+
+  @Override
+  public Encoding getEncoding() {
+    return BIT_PACKED;
+  }
+
+  @Override
+  public String memUsageString(String prefix) {
+    return prefix + "0";
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitReader.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitReader.java
deleted file mode 100644
index caea5b5..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitReader.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.parquet.io.ParquetDecodingException;
-
-class BitReader {
-  private int currentByte = 0;
-  private int currentPosition = 8;
-  private ByteBuffer buf;
-  private int currentBufferPosition = 0;
-  private static final int[] byteGetValueMask = new int[8];
-  private static final int[] readMask = new int[32];
-  private int endBufferPosistion;
-
-  static {
-    int currentMask = 1;
-    for (int i = 0; i < byteGetValueMask.length; i++) {
-      byteGetValueMask[i] = currentMask;
-      currentMask <<= 1;
-    }
-    currentMask = 0;
-    for (int i = 0; i < readMask.length; i++) {
-      readMask[i] = currentMask;
-      currentMask <<= 1;
-      currentMask += 1;
-    }
-  }
-
-  /**
-   * Prepare to deserialize bit-packed integers from the given array.
-   * The array is not copied, so must not be mutated during the course of
-   * reading.
-   */
-  public void prepare(ByteBuffer buf, int offset, int length) {
-    this.buf = buf;
-    this.endBufferPosistion = offset + length;
-    currentByte = 0;
-    currentPosition = 8;
-    currentBufferPosition = offset;
-  }
-
-  /**
-   * Extract the given bit index from the given value.
-   */
-  private static boolean extractBit(int val, int bit) {
-    return (val & byteGetValueMask[bit]) != 0;
-  }
-
-  /**
-   * Read an integer from the stream which is represented by a specified
-   * number of bits.
-   * @param bitsPerValue the number of bits used to represent the integer
-   */
-  public int readNBitInteger(int bitsPerValue) {
-    int bits = bitsPerValue + currentPosition;
-    int currentValue = currentByte >>> currentPosition;
-    int toShift = 8 - currentPosition;
-    while (bits >= 8) {
-      currentByte = getNextByte();
-      currentValue |= currentByte << toShift;
-      toShift += 8;
-      bits -= 8;
-    }
-    currentValue &= readMask[bitsPerValue];
-    currentPosition = (bitsPerValue + currentPosition) % 8;
-    return currentValue;
-  }
-
-  private int getNextByte() {
-    if (currentBufferPosition < endBufferPosistion) {
-      return buf.get(currentBufferPosition++) & 0xFF;
-    }
-    return 0;
-  }
-
-  public boolean readBit() throws IOException {
-    if (currentPosition == 8) {
-      currentByte = getNextByte();
-      currentPosition = 0;
-    }
-    return extractBit(currentByte, currentPosition++);
-  }
-
-  public int readByte() {
-    currentByte |= (getNextByte() << 8);
-    int value = (currentByte >>> currentPosition) & 0xFF;
-    currentByte >>>= 8;
-    return value;
-  }
-
-  public int readUnsignedVarint() throws IOException {
-    int value = 0;
-    int i = 0;
-    int b;
-    while (((b = readByte()) & 0x80) != 0) {
-        value |= (b & 0x7F) << i;
-        i += 7;
-        if (i > 35) {
-            throw new ParquetDecodingException("Variable length quantity is too long");
-        }
-    }
-    return value | (b << i);
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitWriter.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitWriter.java
deleted file mode 100644
index 9489714..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BitWriter.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import org.apache.parquet.bytes.ByteBufferAllocator;
-import org.apache.parquet.Log;
-import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
-
-class BitWriter {
-  private static final Log LOG = Log.getLog(BitWriter.class);
-  private static final boolean DEBUG = false;//Log.DEBUG;
-
-  private CapacityByteArrayOutputStream baos;
-  private int currentByte = 0;
-  private int currentBytePosition = 0;
-  private static final int[] byteToTrueMask = new int[8];
-  private static final int[] byteToFalseMask = new int[8];
-  private boolean finished = false;
-  static {
-    int currentMask = 1;
-    for (int i = 0; i < byteToTrueMask.length; i++) {
-      byteToTrueMask[i] = currentMask;
-      byteToFalseMask[i] = ~currentMask;
-      currentMask <<= 1;
-    }
-  }
-
-  public BitWriter(int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
-    this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize, allocator);
-  }
-
-  public void writeBit(boolean bit) {
-    if (DEBUG) LOG.debug("writing: " + (bit ? "1" : "0"));
-    currentByte = setBytePosition(currentByte, currentBytePosition++, bit);
-    if (currentBytePosition == 8) {
-      baos.write(currentByte);
-      if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte));
-      currentByte = 0;
-      currentBytePosition = 0;
-    }
-  }
-
-  public void writeByte(int val) {
-    if (DEBUG) LOG.debug("writing: " + toBinary(val) + " (" + val + ")");
-    currentByte |= ((val & 0xFF) << currentBytePosition);
-    baos.write(currentByte);
-    if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte));
-    currentByte >>>= 8;
-  }
-
-  /**
-   * Write the given integer, serialized using the given number of bits.
-   * It is assumed that the integer can be correctly serialized within
-   * the provided bit size.
-   * @param val the value to serialize
-   * @param bitsToWrite the number of bits to use
-   */
-  public void writeNBitInteger(int val, int bitsToWrite) {
-    if (DEBUG) LOG.debug("writing: " + toBinary(val, bitsToWrite) + " (" + val + ")");
-    val <<= currentBytePosition;
-    int upperByte = currentBytePosition + bitsToWrite;
-    currentByte |= val;
-    while (upperByte >= 8) {
-      baos.write(currentByte); //this only writes the lowest byte
-      if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte));
-      upperByte -= 8;
-      currentByte >>>= 8;
-    }
-    currentBytePosition = (currentBytePosition + bitsToWrite) % 8;
-  }
-
-  private String toBinary(int val, int alignTo) {
-    String result = Integer.toBinaryString(val);
-    while (result.length() < alignTo) {
-      result = "0" + result;
-    }
-    return result;
-  }
-
-  private String toBinary(int val) {
-    return toBinary(val, 8);
-  }
-
-  public BytesInput finish() {
-    if (!finished) {
-      if (currentBytePosition > 0) {
-        baos.write(currentByte);
-        if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte));
-      }
-    }
-    finished = true;
-    return BytesInput.from(baos);
-  }
-
-  public void reset() {
-    baos.reset();
-    currentByte = 0;
-    currentBytePosition = 0;
-    finished = false;
-  }
-
-  /**
-   * Set or clear the given bit position in the given byte.
-   * @param currentByte the byte to mutate
-   * @param bitOffset the bit to set or clear
-   * @param newBitValue whether to set or clear the bit
-   * @return the mutated byte
-   */
-  private static int setBytePosition(int currentByte, int bitOffset, boolean newBitValue)
{
-    if (newBitValue) {
-      currentByte |= byteToTrueMask[bitOffset];
-    } else {
-      currentByte &= byteToFalseMask[bitOffset];
-    }
-    return currentByte;
-  }
-
-  //This assumes you will never give it a negative value
-  public void writeUnsignedVarint(int value) {
-    while ((value & 0xFFFFFF80) != 0L) {
-      writeByte((value & 0x7F) | 0x80);
-      value >>>= 7;
-    }
-    writeByte(value & 0x7F);
-  }
-
-  public int getMemSize() {
-    // baos = 8 bytes
-    // currentByte + currentBytePosition = 8 bytes
-    // the size of baos:
-    //   count : 4 bytes (rounded to 8)
-    //   buf : 12 bytes (8 ptr + 4 length) should technically be rounded to 8 depending on
buffer size
-    return 32 + (int)baos.size();
-  }
-
-  public int getCapacity() {
-    return baos.getCapacity();
-  }
-
-  public String memUsageString(String prefix) {
-    return baos.memUsageString(prefix);
-  }
-
-  public void close() {
-    currentByte = 0;
-    currentBytePosition = 0;
-    finished = false;
-    baos.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesFactory.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesFactory.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesFactory.java
deleted file mode 100644
index bbbf8da..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesFactory.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import org.apache.parquet.bytes.ByteBufferAllocator;
-import org.apache.parquet.column.values.ValuesReader;
-import org.apache.parquet.column.values.ValuesWriter;
-
-public abstract class BoundedIntValuesFactory {
-  public static ValuesReader getBoundedReader(int bound) {
-    return bound == 0 ? new ZeroIntegerValuesReader() : new BoundedIntValuesReader(bound);
-  }
-
-  public static ValuesWriter getBoundedWriter(int bound, int initialCapacity, int pageSize,
ByteBufferAllocator allocator) {
-    return bound == 0 ? new DevNullValuesWriter() : new BoundedIntValuesWriter(bound, initialCapacity,
pageSize, allocator);
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesReader.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesReader.java
deleted file mode 100644
index c322125..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesReader.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import static org.apache.parquet.Log.DEBUG;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.parquet.Log;
-import org.apache.parquet.bytes.BytesUtils;
-import org.apache.parquet.column.values.ValuesReader;
-import org.apache.parquet.io.ParquetDecodingException;
-
-/**
- * @see BoundedIntValuesWriter
- */
-class BoundedIntValuesReader extends ValuesReader {
-  private static final Log LOG = Log.getLog(BoundedIntValuesReader.class);
-
-  private int currentValueCt = 0;
-  private int currentValue = 0;
-  private final int bitsPerValue;
-  private BitReader bitReader = new BitReader();
-  private int nextOffset;
-
-  public BoundedIntValuesReader(int bound) {
-    if (bound == 0) {
-      throw new ParquetDecodingException("Value bound cannot be 0. Use DevNullColumnReader
instead.");
-    }
-    bitsPerValue = BytesUtils.getWidthFromMaxInt(bound);
-  }
-
-  @Override
-  public int readInteger() {
-    try {
-      if (currentValueCt > 0) {
-        currentValueCt--;
-        return currentValue;
-      }
-      if (bitReader.readBit()) {
-        currentValue = bitReader.readNBitInteger(bitsPerValue);
-        currentValueCt = bitReader.readUnsignedVarint() - 1;
-      } else {
-        currentValue = bitReader.readNBitInteger(bitsPerValue);
-      }
-      return currentValue;
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read int", e);
-    }
-  }
-
-  // This forces it to deserialize into memory. If it wanted
-  // to, it could just read the bytes (though that number of
-  // bytes would have to be serialized). This is the flip-side
-  // to BoundedIntColumnWriter.writeData(BytesOutput)
-  @Override
-  public void initFromPage(int valueCount, ByteBuffer in, int offset) throws IOException
{
-    if (DEBUG) LOG.debug("reading size at "+ offset + ": " + in.get(offset) + " " + in.get(offset
+ 1) + " " + in.get(offset + 2) + " " + in.get(offset + 3) + " ");
-    int totalBytes = BytesUtils.readIntLittleEndian(in, offset);
-    if (DEBUG) LOG.debug("will read "+ totalBytes + " bytes");
-    currentValueCt = 0;
-    currentValue = 0;
-    bitReader.prepare(in, offset + 4, totalBytes);
-    if (DEBUG) LOG.debug("will read next from " + (offset + totalBytes + 4));
-    this.nextOffset = offset + totalBytes + 4;
-  }
-  
-  @Override
-  public int getNextOffset() {
-    return this.nextOffset;
-  }
-
-  @Override
-  public void skip() {
-    readInteger();
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesWriter.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesWriter.java
deleted file mode 100644
index a90a6e5..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/BoundedIntValuesWriter.java
+++ /dev/null
@@ -1,165 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import org.apache.parquet.bytes.ByteBufferAllocator;
-import static org.apache.parquet.bytes.BytesInput.concat;
-import static org.apache.parquet.column.Encoding.RLE;
-import org.apache.parquet.Log;
-import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.column.Encoding;
-import org.apache.parquet.column.values.ValuesWriter;
-import org.apache.parquet.column.values.bitpacking.BitPackingValuesWriter;
-import org.apache.parquet.io.ParquetEncodingException;
-
-/**
- * This is a special ColumnWriter for the case when you need to write
- * integers in a known range. This is intended primarily for use with
- * repetition and definition levels, since the maximum value that will
- * be written is known a priori based on the schema. Assumption is that
- * the values written are between 0 and the bound, inclusive.
- *
- * This differs from {@link BitPackingValuesWriter} in that this also performs
- * run-length encoding of the data, so is useful when long runs of repeated
- * values are expected.
- */
-class BoundedIntValuesWriter extends ValuesWriter {
-  private static final Log LOG = Log.getLog(BoundedIntValuesWriter.class);
-
-  private int currentValue = -1;
-  private int currentValueCt = -1;
-  private boolean currentValueIsRepeated = false;
-  private boolean thereIsABufferedValue = false;
-  private int shouldRepeatThreshold = 0;
-  private int bitsPerValue;
-  private BitWriter bitWriter;
-  private boolean isFirst = true;
-
-  private static final int[] byteToTrueMask = new int[8];
-  static {
-    int currentMask = 1;
-    for (int i = 0; i < byteToTrueMask.length; i++) {
-      byteToTrueMask[i] = currentMask;
-      currentMask <<= 1;
-    }
-  }
-
-  public BoundedIntValuesWriter(int bound, int initialCapacity, int pageSize, ByteBufferAllocator
allocator) {
-    if (bound == 0) {
-      throw new ParquetEncodingException("Value bound cannot be 0. Use DevNullColumnWriter
instead.");
-    }
-    this.bitWriter = new BitWriter(initialCapacity, pageSize, allocator);
-    bitsPerValue = (int)Math.ceil(Math.log(bound + 1)/Math.log(2));
-    shouldRepeatThreshold = (bitsPerValue + 9)/(1 + bitsPerValue);
-    if (Log.DEBUG) LOG.debug("init column with bit width of " + bitsPerValue + " and repeat
threshold of " + shouldRepeatThreshold);
-  }
-
-  @Override
-  public long getBufferedSize() {
-    // currentValue + currentValueCt = 8 bytes
-    // shouldRepeatThreshold + bitsPerValue = 8 bytes
-    // bitWriter = 8 bytes
-    // currentValueIsRepeated + isFirst = 2 bytes (rounded to 8 b/c of word boundaries)
-    return 32 + (bitWriter == null ? 0 : bitWriter.getMemSize());
-  }
-
-  // This assumes that the full state must be serialized, since there is no close method
-  @Override
-  public BytesInput getBytes() {
-    serializeCurrentValue();
-    BytesInput buf = bitWriter.finish();
-    if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes");
-    // We serialize the length so that on deserialization we can
-    // deserialize as we go, instead of having to load everything
-    // into memory
-    return concat(BytesInput.fromInt((int)buf.size()), buf);
-  }
-
-  @Override
-  public void reset() {
-    currentValue = -1;
-    currentValueCt = -1;
-    currentValueIsRepeated = false;
-    thereIsABufferedValue = false;
-    isFirst = true;
-    bitWriter.reset();
-  }
-
-  @Override
-  public void close() {
-    bitWriter.close();
-  }
-
-  @Override
-  public void writeInteger(int val) {
-    if (currentValue == val) {
-      currentValueCt++;
-      if (!currentValueIsRepeated && currentValueCt >= shouldRepeatThreshold)
{
-        currentValueIsRepeated = true;
-      }
-    } else {
-      if (!isFirst) {
-        serializeCurrentValue();
-      } else {
-        isFirst = false;
-      }
-
-      newCurrentValue(val);
-    }
-  }
-
-  private void serializeCurrentValue() {
-    if (thereIsABufferedValue) {
-      if (currentValueIsRepeated) {
-        bitWriter.writeBit(true);
-        bitWriter.writeNBitInteger(currentValue, bitsPerValue);
-        bitWriter.writeUnsignedVarint(currentValueCt);
-      } else {
-        for (int i = 0; i < currentValueCt; i++) {
-          bitWriter.writeBit(false);
-          bitWriter.writeNBitInteger(currentValue, bitsPerValue);
-        }
-      }
-    }
-    thereIsABufferedValue = false;
-  }
-
-  private void newCurrentValue(int val) {
-    currentValue = val;
-    currentValueCt = 1;
-    currentValueIsRepeated = false;
-    thereIsABufferedValue = true;
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return bitWriter.getCapacity();
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return RLE;
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return bitWriter.memUsageString(prefix);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/DevNullValuesWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/DevNullValuesWriter.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/DevNullValuesWriter.java
deleted file mode 100644
index af92941..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/DevNullValuesWriter.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import static org.apache.parquet.column.Encoding.BIT_PACKED;
-import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.column.Encoding;
-import org.apache.parquet.column.values.ValuesWriter;
-import org.apache.parquet.io.api.Binary;
-
-/**
- * This is a special writer that doesn't write anything. The idea being that
- * some columns will always be the same value, and this will capture that. An
- * example is the set of repetition levels for a schema with no repeated fields.
- */
-public class DevNullValuesWriter extends ValuesWriter {
-  @Override
-  public long getBufferedSize() {
-    return 0;
-  }
-
-  @Override
-  public void reset() {
-  }
-
-  @Override
-  public void writeInteger(int v) {
-  }
-
-  @Override
-  public void writeByte(int value) {
-  }
-
-  @Override
-  public void writeBoolean(boolean v) {
-  }
-
-  @Override
-  public void writeBytes(Binary v) {
-  }
-
-  @Override
-  public void writeLong(long v) {
-  }
-
-  @Override
-  public void writeDouble(double v) {
-  }
-
-  @Override
-  public void writeFloat(float v) {
-  }
-
-  @Override
-  public BytesInput getBytes() {
-    return BytesInput.empty();
-  }
-
-  @Override
-  public long getAllocatedSize() {
-    return 0;
-  }
-
-  @Override
-  public Encoding getEncoding() {
-    return BIT_PACKED;
-  }
-
-  @Override
-  public String memUsageString(String prefix) {
-    return prefix + "0";
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
deleted file mode 100644
index 8c78c38..0000000
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/boundedint/ZeroIntegerValuesReader.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.parquet.column.values.ValuesReader;
-
-/**
- * ColumnReader which does not read any actual data, but rather simply produces
- * an endless stream of constant values.
- * Mainly used to read definition levels when the only possible value is 0
- */
-public class ZeroIntegerValuesReader extends ValuesReader {
-  
-  private int nextOffset;
-
-  public int readInteger() {
-    return 0;
-  }
-
-  @Override
-  public void initFromPage(int valueCount, ByteBuffer in, int offset) throws IOException
{
-    this.nextOffset = offset;
-  }
-  
-  @Override
-  public int getNextOffset() {
-    return nextOffset;
-  }
-
-  @Override
-  public void skip() {
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
new file mode 100644
index 0000000..f8ff8d0
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
@@ -0,0 +1,53 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.rle;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.column.values.ValuesReader;
+
+/**
+ * ColumnReader which does not read any actual data, but rather simply produces
+ * an endless stream of constant values.
+ * Mainly used to read definition levels when the only possible value is 0
+ */
+public class ZeroIntegerValuesReader extends ValuesReader {
+  
+  private int nextOffset;
+
+  public int readInteger() {
+    return 0;
+  }
+
+  @Override
+  public void initFromPage(int valueCount, ByteBuffer in, int offset) throws IOException
{
+    this.nextOffset = offset;
+  }
+  
+  @Override
+  public int getNextOffset() {
+    return nextOffset;
+  }
+
+  @Override
+  public void skip() {
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6a62646b/parquet-column/src/test/java/org/apache/parquet/column/values/boundedint/TestBoundedColumns.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/boundedint/TestBoundedColumns.java
b/parquet-column/src/test/java/org/apache/parquet/column/values/boundedint/TestBoundedColumns.java
deleted file mode 100644
index d1e43d2..0000000
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/boundedint/TestBoundedColumns.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.column.values.boundedint;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.Random;
-
-import org.junit.Test;
-
-import org.apache.parquet.bytes.DirectByteBufferAllocator;
-
-public class TestBoundedColumns {
-  private final Random r = new Random(42L);
-
-  @Test
-  public void testWriterRepeatNoRepeatAndRepeatUnderThreshold() throws IOException {
-    int[] ints = {
-        1, 1, 1, 1,
-        0,
-        0,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 16 2s
-        1,
-        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 // 24 5s
-        };
-    String[] result = {"1",b(1,3),b(4),"0",b(0,3),"0",b(0,3),"1",b(2,3),b(16),"0",b(1,3),"1",b(5,3),b(24)};
-    compareOutput(7, ints, result);
-  }
-
-  @Test
-  public void testWriterNoRepeat() throws IOException {
-    int bound = 7;
-    int[] ints = { 0, 1, 2, 3, 4, 5, 6, 7};
-    String[] result = {"0",b(0,3),"0",b(1,3),"0",b(2,3),"0",b(3,3),"0",b(4,3),"0",b(5,3),"0",b(6,3),"0",b(7,3)};
-    compareOutput(bound, ints, result);
-  }
-
-  private void compareOutput(int bound, int[] ints, String[] result) throws IOException {
-    BoundedIntValuesWriter bicw = new BoundedIntValuesWriter(bound, 64*1024, 64*1024, new
DirectByteBufferAllocator());
-    for (int i : ints) {
-      bicw.writeInteger(i);
-    }
-    System.out.println(Arrays.toString(ints));
-    System.out.println(Arrays.toString(result));
-    byte[] byteArray = bicw.getBytes().toByteArray();
-    assertEquals(concat(result), toBinaryString(byteArray, 4));
-    BoundedIntValuesReader bicr = new BoundedIntValuesReader(bound);
-    bicr.initFromPage(1, ByteBuffer.wrap(byteArray), 0);
-    String expected = "";
-    String got = "";
-    for (int i : ints) {
-      expected += " " + i;
-      got += " " + bicr.readInteger();
-    }
-    assertEquals(expected, got);
-  }
-
-  private String concat(String[] result) {
-    String r = "";
-    for (String string : result) {
-      r = string + r;
-    }
-    return r;
-  }
-
-  private String b(int i) {
-    return b(i,8);
-  }
-
-  private String b(int i, int size) {
-    String binaryString = Integer.toBinaryString(i);
-    while (binaryString.length() < size) {
-      binaryString = "0" + binaryString;
-    }
-    return binaryString;
-  }
-
-  public static String toBinaryString(byte[] bytes) {
-    return toBinaryString(bytes, 0);
-  }
-
-  private static String toBinaryString(byte[] bytes, int offset) {
-    String result = "";
-    for (int i = offset; i < bytes.length; i++) {
-      int b = bytes[i] < 0 ? 256 + bytes[i] : bytes[i];
-      String binaryString = Integer.toBinaryString(b);
-      while (binaryString.length() < 8) {
-        binaryString = "0" + binaryString;
-      }
-      result = binaryString + result;
-    }
-    return result;
-  }
-
-  @Test
-  public void testSerDe() throws Exception {
-    int[] valuesPerStripe = new int[] { 50, 100, 700, 1, 200 };
-    int totalValuesInStream = 0;
-    for (int v : valuesPerStripe) {
-      totalValuesInStream += v * 2;
-    }
-
-    for (int bound = 1; bound < 8; bound++) {
-      System.out.println("bound: "+ bound);
-      ByteArrayOutputStream tmp = new ByteArrayOutputStream();
-
-      int[] stream = new int[totalValuesInStream];
-      BoundedIntValuesWriter bicw = new BoundedIntValuesWriter(bound, 64 * 1024, 64*1024,
new DirectByteBufferAllocator());
-      int idx = 0;
-      for (int stripeNum = 0; stripeNum < valuesPerStripe.length; stripeNum++) {
-        int next = 0;
-        for (int i = 0; i < valuesPerStripe[stripeNum]; i++) {
-          int temp = r.nextInt(bound + 1);
-          while (next == temp) {
-            temp = r.nextInt(bound + 1);
-          }
-          next = temp;
-          stream[idx++] = next;
-          int ct;
-          if (r.nextBoolean()) {
-            stream[idx++] = ct = r.nextInt(1000) + 1;
-          } else {
-            stream[idx++] = ct = 1;
-          }
-          for (int j = 0; j < ct; j++) {
-            bicw.writeInteger(next);
-          }
-        }
-        bicw.getBytes().writeAllTo(tmp);
-        bicw.reset();
-      }
-      tmp.close();
-
-      byte[] input = tmp.toByteArray();
-
-      BoundedIntValuesReader bicr = new BoundedIntValuesReader(bound);
-      idx = 0;
-      int offset = 0;
-      for (int stripeNum = 0; stripeNum < valuesPerStripe.length; stripeNum++) {
-        bicr.initFromPage(1, ByteBuffer.wrap(input), offset);
-        offset = bicr.getNextOffset();
-        for (int i = 0; i < valuesPerStripe[stripeNum]; i++) {
-          int number = stream[idx++];
-          int ct = stream[idx++];
-          assertTrue(number <= bound);
-          assertTrue(ct > 0);
-          for (int j = 0; j < ct; j++) {
-            assertEquals("Failed on bound ["+bound+"], stripe ["+stripeNum+"], iteration
["+i+"], on count ["+ct+"]", number, bicr.readInteger());
-          }
-        }
-      }
-    }
-  }
-}


Mime
View raw message