parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [1/2] parquet-mr git commit: PARQUET-225: Add support for INT64 delta encoding.
Date Thu, 21 Apr 2016 18:43:11 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master 741944332 -> 8bcfe6c55


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/8bcfe6c5/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBitPacking.java
----------------------------------------------------------------------
diff --git a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBitPacking.java
b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBitPacking.java
index 8df5f39..b7dc26b 100644
--- a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBitPacking.java
+++ b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBitPacking.java
@@ -22,10 +22,10 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Random;
 
 import org.junit.Assert;
 import org.junit.Test;
-
 import org.apache.parquet.Log;
 import org.apache.parquet.column.values.bitpacking.BitPacking.BitPackingReader;
 import org.apache.parquet.column.values.bitpacking.BitPacking.BitPackingWriter;
@@ -46,6 +46,24 @@ public class TestByteBitPacking {
       Assert.assertArrayEquals("width "+i, values, unpacked);
     }
   }
+  
+  @Test
+  public void testPackUnPackLong() {
+    LOG.debug("");
+    LOG.debug("testPackUnPackLong");
+    for (int i = 1; i < 64; i++) {
+      LOG.debug("Width: " + i);
+      long[] unpacked32 = new long[32];
+      long[] unpacked8 = new long[32];
+      long[] values = generateValuesLong(i);
+      packUnpack32(Packer.BIG_ENDIAN.newBytePackerForLong(i), values, unpacked32);
+      LOG.debug("Output 32: " + TestBitPacking.toString(unpacked32));
+      Assert.assertArrayEquals("width "+i, values, unpacked32);
+      packUnpack8(Packer.BIG_ENDIAN.newBytePackerForLong(i), values, unpacked8);
+      LOG.debug("Output 8: " + TestBitPacking.toString(unpacked8));
+      Assert.assertArrayEquals("width "+i, values, unpacked8);
+    }
+  }
 
   private void packUnpack(BytePacker packer, int[] values, int[] unpacked) {
     byte[] packed = new byte[packer.getBitWidth() * 4];
@@ -54,6 +72,24 @@ public class TestByteBitPacking {
     packer.unpack32Values(ByteBuffer.wrap(packed), 0, unpacked, 0);
   }
 
+  private void packUnpack32(BytePackerForLong packer, long[] values, long[] unpacked) {
+    byte[] packed = new byte[packer.getBitWidth() * 4];
+    packer.pack32Values(values, 0, packed, 0);
+    LOG.debug("packed: " + TestBitPacking.toString(packed));
+    packer.unpack32Values(packed, 0, unpacked, 0);
+  }
+
+  private void packUnpack8(BytePackerForLong packer, long[] values, long[] unpacked) {
+    byte[] packed = new byte[packer.getBitWidth() * 4];
+    for (int i = 0; i < 4; i++) {
+      packer.pack8Values(values,  8 * i, packed, packer.getBitWidth() * i);
+    }
+    LOG.debug("packed: " + TestBitPacking.toString(packed));
+    for (int i = 0; i < 4; i++) {
+      packer.unpack8Values(packed, packer.getBitWidth() * i, unpacked, 8 * i);
+    }
+  }
+
   private int[] generateValues(int bitWidth) {
     int[] values = new int[32];
     for (int j = 0; j < values.length; j++) {
@@ -63,6 +99,16 @@ public class TestByteBitPacking {
     return values;
   }
 
+  private long[] generateValuesLong(int bitWidth) {
+    long[] values = new long[32];
+    Random random = new Random(0);
+    for (int j = 0; j < values.length; j++) {
+      values[j] = random.nextLong() & ((1l << bitWidth) - 1l);
+    }
+    LOG.debug("Input:  " + TestBitPacking.toString(values));
+    return values;
+  }
+
   @Test
   public void testPackUnPackAgainstHandWritten() throws IOException {
     LOG.debug("");

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/8bcfe6c5/parquet-generator/src/main/java/org/apache/parquet/encoding/bitpacking/ByteBasedBitPackingGenerator.java
----------------------------------------------------------------------
diff --git a/parquet-generator/src/main/java/org/apache/parquet/encoding/bitpacking/ByteBasedBitPackingGenerator.java
b/parquet-generator/src/main/java/org/apache/parquet/encoding/bitpacking/ByteBasedBitPackingGenerator.java
index 3d182e2..b4868e9 100644
--- a/parquet-generator/src/main/java/org/apache/parquet/encoding/bitpacking/ByteBasedBitPackingGenerator.java
+++ b/parquet-generator/src/main/java/org/apache/parquet/encoding/bitpacking/ByteBasedBitPackingGenerator.java
@@ -27,23 +27,40 @@ import java.io.IOException;
  * This class generates bit packers that pack the most significant bit first.
  * The result of the generation is checked in. To regenerate the code run this class and
check in the result.
  *
- * TODO: remove the unnecessary masks for perf
- *
  * @author Julien Le Dem
  *
  */
 public class ByteBasedBitPackingGenerator {
 
-  private static final String CLASS_NAME_PREFIX = "ByteBitPacking";
-  private static final int PACKER_COUNT = 32;
+  private static final String CLASS_NAME_PREFIX_FOR_INT = "ByteBitPacking";
+  private static final String CLASS_NAME_PREFIX_FOR_LONG = "ByteBitPackingForLong";
+  private static final String VARIABLE_TYPE_FOR_INT = "int";
+  private static final String VARIABLE_TYPE_FOR_LONG = "long";
+  private static final int MAX_BITS_FOR_INT = 32;
+  private static final int MAX_BITS_FOR_LONG = 64;
 
   public static void main(String[] args) throws Exception {
     String basePath = args[0];
-    generateScheme(CLASS_NAME_PREFIX + "BE", true, basePath);
-    generateScheme(CLASS_NAME_PREFIX + "LE", false, basePath);
+    // Int for Big Endian
+    generateScheme(false, true, basePath);
+
+    // Int for Little Endian
+    generateScheme(false, false, basePath);
+
+    // Long for Big Endian
+    generateScheme(true, true, basePath);
+
+    // Long for Little Endian
+    generateScheme(true, false, basePath);
   }
 
-  private static void generateScheme(String className, boolean msbFirst, String basePath)
throws IOException {
+  private static void generateScheme(boolean isLong, boolean msbFirst, 
+      String basePath) throws IOException {
+    String baseClassName = isLong ? CLASS_NAME_PREFIX_FOR_LONG : CLASS_NAME_PREFIX_FOR_INT;
+    String className = msbFirst ? (baseClassName + "BE") : (baseClassName + "LE");
+    int maxBits = isLong ? MAX_BITS_FOR_LONG : MAX_BITS_FOR_INT;
+    String nameSuffix = isLong ? "ForLong" : "";
+    
     final File file = new File(basePath + "/org/apache/parquet/column/values/bitpacking/"
+ className + ".java").getAbsoluteFile();
     if (!file.getParentFile().exists()) {
       file.getParentFile().mkdirs();
@@ -65,48 +82,58 @@ public class ByteBasedBitPackingGenerator {
     fw.append(" */\n");
     fw.append("public abstract class " + className + " {\n");
     fw.append("\n");
-    fw.append("  private static final BytePacker[] packers = new BytePacker[33];\n");
+    fw.append("  private static final BytePacker" + nameSuffix + "[] packers = new BytePacker"
+ nameSuffix + "[" + (maxBits + 1) + "];\n");
     fw.append("  static {\n");
-    for (int i = 0; i <= PACKER_COUNT; i++) {
+    for (int i = 0; i <= maxBits; i++) {
       fw.append("    packers[" + i + "] = new Packer" + i + "();\n");
     }
     fw.append("  }\n");
     fw.append("\n");
-    fw.append("  public static final BytePackerFactory factory = new BytePackerFactory()
{\n");
-    fw.append("    public BytePacker newBytePacker(int bitWidth) {\n");
+    fw.append("  public static final BytePacker" + nameSuffix + "Factory factory = new BytePacker"
+ nameSuffix + "Factory() {\n");
+    fw.append("    public BytePacker" + nameSuffix + " newBytePacker" + nameSuffix + "(int
bitWidth) {\n");
     fw.append("      return packers[bitWidth];\n");
     fw.append("    }\n");
     fw.append("  };\n");
     fw.append("\n");
-    for (int i = 0; i <= PACKER_COUNT; i++) {
-      generateClass(fw, i, msbFirst);
+    for (int i = 0; i <= maxBits; i++) {
+      generateClass(fw, i, isLong, msbFirst);
       fw.append("\n");
     }
     fw.append("}\n");
     fw.close();
   }
 
-  private static void generateClass(FileWriter fw, int bitWidth, boolean msbFirst) throws
IOException {
-    fw.append("  private static final class Packer" + bitWidth + " extends BytePacker {\n");
+  private static void generateClass(FileWriter fw, int bitWidth, boolean isLong, boolean
msbFirst) throws IOException {
+    String nameSuffix = isLong ? "ForLong" : "";
+    fw.append("  private static final class Packer" + bitWidth + " extends BytePacker" +
nameSuffix + " {\n");
     fw.append("\n");
     fw.append("    private Packer" + bitWidth + "() {\n");
     fw.append("      super("+bitWidth+");\n");
     fw.append("    }\n");
     fw.append("\n");
     // Packing
-    generatePack(fw, bitWidth, 1, msbFirst);
-    generatePack(fw, bitWidth, 4, msbFirst);
+    generatePack(fw, bitWidth, 1, isLong, msbFirst);
+    generatePack(fw, bitWidth, 4, isLong, msbFirst);
 
     // Unpacking
-    generateUnpack(fw, bitWidth, 1, msbFirst, true);
-    generateUnpack(fw, bitWidth, 1, msbFirst, false);
-    generateUnpack(fw, bitWidth, 4, msbFirst, true);
-    generateUnpack(fw, bitWidth, 4, msbFirst, false);
+    generateUnpack(fw, bitWidth, 1, isLong, msbFirst, true);
+    generateUnpack(fw, bitWidth, 1, isLong, msbFirst, false);
+    generateUnpack(fw, bitWidth, 4, isLong, msbFirst, true);
+    generateUnpack(fw, bitWidth, 4, isLong, msbFirst, false);
 
     fw.append("  }\n");
   }
-
-  private static int getShift(FileWriter fw, int bitWidth, boolean msbFirst,
+  
+  private static class ShiftMask {
+    ShiftMask(int shift, long mask) {
+      this.shift = shift;
+      this.mask = mask;
+    }
+    public int shift;
+    public long mask;
+  }
+  
+  private static ShiftMask getShift(FileWriter fw, int bitWidth, boolean isLong, boolean
msbFirst,
       int byteIndex, int valueIndex) throws IOException {
     // relative positions of the start and end of the value to the start and end of the byte
     int valueStartBitIndex = (valueIndex * bitWidth) - (8 * (byteIndex));
@@ -120,6 +147,7 @@ public class ByteBasedBitPackingGenerator {
     int byteEndBitWanted;
 
     int shift;
+    int widthWanted;
 
     if (msbFirst) {
       valueStartBitWanted = valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex
: bitWidth - 1;
@@ -127,13 +155,17 @@ public class ByteBasedBitPackingGenerator {
       byteStartBitWanted = valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex;
       byteEndBitWanted = valueEndBitIndex > 0 ? 0 : -valueEndBitIndex;
       shift = valueEndBitWanted - byteEndBitWanted;
+      widthWanted = Math.min(7, byteStartBitWanted) - Math.min(7, byteEndBitWanted) + 1;
     } else {
       valueStartBitWanted = bitWidth - 1 - (valueEndBitIndex > 0 ? valueEndBitIndex :
0);
       valueEndBitWanted = bitWidth - 1 - (valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex
: bitWidth - 1);
       byteStartBitWanted = 7 - (valueEndBitIndex > 0 ? 0 : -valueEndBitIndex);
       byteEndBitWanted = 7 - (valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex);
       shift = valueStartBitWanted - byteStartBitWanted;
+      widthWanted = Math.max(0, byteStartBitWanted) - Math.max(0, byteEndBitWanted) + 1;
     }
+    
+    int maskWidth = widthWanted + Math.max(0, shift);
 
     visualizeAlignment(
         fw, bitWidth, valueEndBitIndex,
@@ -141,7 +173,7 @@ public class ByteBasedBitPackingGenerator {
         byteStartBitWanted, byteEndBitWanted,
         shift
         );
-    return shift;
+    return new ShiftMask(shift, genMask(maskWidth, isLong));
   }
 
   private static void visualizeAlignment(FileWriter fw, int bitWidth,
@@ -177,9 +209,11 @@ public class ByteBasedBitPackingGenerator {
     fw.append("           ");
   }
 
-  private static void generatePack(FileWriter fw, int bitWidth, int batch, boolean msbFirst)
throws IOException {
-    int mask = genMask(bitWidth);
-    fw.append("    public final void pack" + (batch * 8) + "Values(final int[] in, final
int inPos, final byte[] out, final int outPos) {\n");
+  private static void generatePack(FileWriter fw, int bitWidth, int batch, boolean isLong,
boolean msbFirst) throws IOException {
+    long mask = genMask(bitWidth, isLong);
+    String maskSuffix = isLong ? "L" : "";
+    String variableType = isLong ? VARIABLE_TYPE_FOR_LONG : VARIABLE_TYPE_FOR_INT;
+    fw.append("    public final void pack" + (batch * 8) + "Values(final " + variableType
+ "[] in, final int inPos, final byte[] out, final int outPos) {\n");
     for (int byteIndex = 0; byteIndex < bitWidth * batch; ++byteIndex) {
       fw.append("      out[" + align(byteIndex, 2) + " + outPos] = (byte)((\n");
       int startIndex = (byteIndex * 8) / bitWidth;
@@ -191,32 +225,31 @@ public class ByteBasedBitPackingGenerator {
         } else {
           fw.append("\n        | ");
         }
-        int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);
+        ShiftMask shiftMask = getShift(fw, bitWidth, isLong, msbFirst, byteIndex, valueIndex);
 
         String shiftString = ""; // used when shift == 0
-        if (shift > 0) {
-          shiftString = " >>> " + shift;
-        } else if (shift < 0) {
-          shiftString = " <<  " + ( - shift);
+        if (shiftMask.shift > 0) {
+          shiftString = " >>> " + shiftMask.shift;
+        } else if (shiftMask.shift < 0) {
+          shiftString = " <<  " + ( - shiftMask.shift);
         }
-        fw.append("((in[" + align(valueIndex, 2) + " + inPos] & " + mask + ")" + shiftString
+ ")");
+        fw.append("((in[" + align(valueIndex, 2) + " + inPos] & " + mask + maskSuffix
+ ")" + shiftString + ")");
       }
       fw.append(") & 255);\n");
     }
     fw.append("    }\n");
   }
 
-  private static void generateUnpack(FileWriter fw, int bitWidth, int batch, boolean msbFirst,
boolean useByteArray)
+  private static void generateUnpack(FileWriter fw, int bitWidth, int batch, boolean isLong,
boolean msbFirst, boolean useByteArray)
       throws IOException {
-    final String bufferDataType;
-    if (useByteArray) {
-      bufferDataType = "byte[]";
-    } else {
-      bufferDataType = "ByteBuffer";
-    }
-    fw.append("    public final void unpack" + (batch * 8) + "Values(final " + bufferDataType
+ " in, final int inPos, final int[] out, final int outPos) {\n");
+    final String variableType = isLong ? VARIABLE_TYPE_FOR_LONG : VARIABLE_TYPE_FOR_INT;
+    final String bufferDataType = useByteArray ? "byte[]" : "ByteBuffer";
+    
+    fw.append("    public final void unpack" + (batch * 8) + "Values(final " + bufferDataType
+ " in, "
+        + "final int inPos, final " + variableType + "[] out, final int outPos) {\n");
+
     if (bitWidth > 0) {
-      int mask = genMask(bitWidth);
+      String maskSuffix = isLong ? "L" : "";
       for (int valueIndex = 0; valueIndex < (batch * 8); ++valueIndex) {
         fw.append("      out[" + align(valueIndex, 2) + " + outPos] =\n");
 
@@ -229,14 +262,16 @@ public class ByteBasedBitPackingGenerator {
           } else {
             fw.append("\n        | ");
           }
-          int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);
+          
+          ShiftMask shiftMask = getShift(fw, bitWidth, isLong, msbFirst, byteIndex, valueIndex);
 
           String shiftString = ""; // when shift == 0
-          if (shift < 0) {
-            shiftString = ">>>  " + (-shift);
-          } else if (shift > 0){
-            shiftString = "<<  " + shift;
+          if (shiftMask.shift < 0) {
+            shiftString = ">>  " + (-shiftMask.shift);
+          } else if (shiftMask.shift > 0){
+            shiftString = "<<  " + shiftMask.shift;
           }
+
           final String byteAccess;
           if (useByteArray) {
             byteAccess = "in[" + align(byteIndex, 2) + " + inPos]";
@@ -244,7 +279,10 @@ public class ByteBasedBitPackingGenerator {
             // use ByteBuffer#get(index) method
             byteAccess = "in.get(" + align(byteIndex, 2) + " + inPos)";
           }
-          fw.append(" (((((int)" + byteAccess + ") & 255) " + shiftString + ") &
" + mask + ")");
+
+          // Shift the wanted bits to the least significant position and mask them knowing
how many bits to get.
+          fw.append(" ((((" + variableType + ")" + byteAccess + ") " + shiftString +
+              ") & " + shiftMask.mask + maskSuffix + ")");
         }
         fw.append(";\n");
       }
@@ -252,8 +290,14 @@ public class ByteBasedBitPackingGenerator {
     fw.append("    }\n");
   }
 
-  private static int genMask(int bitWidth) {
-    int mask = 0;
+  private static long genMask(int bitWidth, boolean isLong) {
+    int maxBitWidth = isLong ? MAX_BITS_FOR_LONG : MAX_BITS_FOR_INT;
+    if (bitWidth >= maxBitWidth) {
+      // -1 is always ones (11111...1111). It covers all it can possibly can.
+      return -1;
+    }
+    
+    long mask = 0;
     for (int i = 0; i < bitWidth; i++) {
       mask <<= 1;
       mask |= 1;


Mime
View raw message