parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ziva...@apache.org
Subject [parquet-mr] branch parquet-1.8.x updated: Revert "PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder"
Date Tue, 24 Apr 2018 13:19:40 GMT
This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch parquet-1.8.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/parquet-1.8.x by this push:
     new 3db8547  Revert "PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder"
3db8547 is described below

commit 3db8547dc21a9d68e166f55dc877af4b2455bd83
Author: Gabor Szadovszky <gabor.szadovszky@cloudera.com>
AuthorDate: Mon Apr 23 13:31:56 2018 +0200

    Revert "PARQUET-852: Slowly ramp up sizes of byte[] in ByteBasedBitPackingEncoder"
    
    Reverting this change as it is a minor improvement and shall not be part of a maintencance
release.
    
    This reverts commit d59b32a9120ad40e2a9f6651b680e84dae1747a6.
---
 .../bitpacking/ByteBasedBitPackingEncoder.java     | 30 ++++++----------------
 .../bitpacking/TestByteBasedBitPackingEncoder.java | 18 +++++--------
 2 files changed, 14 insertions(+), 34 deletions(-)

diff --git a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
index 0bc8b30..cc23e8f 100644
--- a/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/main/java/org/apache/parquet/column/values/bitpacking/ByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/* 
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
+ * 
  *   http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -39,14 +39,11 @@ public class ByteBasedBitPackingEncoder {
   private static final Logger LOG = LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);
 
   private static final int VALUES_WRITTEN_AT_A_TIME = 8;
-  private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
-  private static final int INITIAL_SLAB_SIZE_MULT = 1024;
 
   private final int bitWidth;
   private final BytePacker packer;
   private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
-  private int slabSize;
-  private long totalFullSlabSize;
+  private final int slabSize;
   private int inputSize;
   private byte[] packed;
   private int packedPosition;
@@ -59,9 +56,8 @@ public class ByteBasedBitPackingEncoder {
   public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
     this.bitWidth = bitWidth;
     this.inputSize = 0;
-    this.totalFullSlabSize = 0;
     // must be a multiple of bitWidth
-    this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
+    this.slabSize = bitWidth * 64 * 1024;
     initPackedSlab();
     this.packer = packer.newBytePacker(bitWidth);
   }
@@ -79,10 +75,6 @@ public class ByteBasedBitPackingEncoder {
       pack();
       if (packedPosition == slabSize) {
         slabs.add(BytesInput.from(packed));
-        totalFullSlabSize += slabSize;
-        if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
-          slabSize *= 2;
-        }
         initPackedSlab();
       }
     }
@@ -107,7 +99,7 @@ public class ByteBasedBitPackingEncoder {
   public BytesInput toBytes() throws IOException {
     int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize
* bitWidth);
 
-    LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
+    LOG.debug("writing {} bytes", (slabs.size() * slabSize + packedByteLength));
     if (inputSize > 0) {
       for (int i = inputSize; i < input.length; i++) {
         input[i] = 0;
@@ -121,24 +113,18 @@ public class ByteBasedBitPackingEncoder {
    * @return size of the data as it would be written
    */
   public long getBufferSize() {
-    return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) * bitWidth);
+    return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
   }
 
   /**
    * @return total memory allocated
    */
   public long getAllocatedSize() {
-    return totalFullSlabSize + packed.length + input.length * 4;
+    return (slabs.size() * slabSize) + packed.length + input.length * 4;
   }
 
   public String memUsageString(String prefix) {
     return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
   }
 
-  /**
-   * @return number of full slabs along with the current slab (debug aid)
-   */
-  int getNumSlabs() {
-    return slabs.size() + 1;
-  }
 }
diff --git a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
index b49595b..293b961 100644
--- a/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
+++ b/parquet-encoding/src/test/java/org/apache/parquet/column/values/bitpacking/TestByteBasedBitPackingEncoder.java
@@ -1,4 +1,4 @@
-/*
+/* 
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
+ * 
  *   http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,28 +18,22 @@
  */
 package org.apache.parquet.column.values.bitpacking;
 
-import org.apache.parquet.bytes.BytesUtils;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-
 public class TestByteBasedBitPackingEncoder {
 
   @Test
   public void testSlabBoundary() {
-    for (int i = 0; i <= 32; i++) {
+    for (int i = 0; i < 32; i++) {
       final ByteBasedBitPackingEncoder encoder = new ByteBasedBitPackingEncoder(i, Packer.BIG_ENDIAN);
-      // make sure to write through the progression of slabs
-      final int totalValues = 191 * 1024 * 8 + 10;
-      for (int j = 0; j < totalValues; j++) {
+      // make sure to write more than a slab
+      for (int j = 0; j < 64 * 1024 * 32 + 10; j++) {
         try {
           encoder.writeInt(j);
         } catch (Exception e) {
           throw new RuntimeException(i + ": error writing " + j, e);
         }
       }
-      assertEquals(BytesUtils.paddedByteCountFromBits(totalValues * i), encoder.getBufferSize());
-      assertEquals(i == 0 ? 1 : 9, encoder.getNumSlabs());
     }
   }
 

-- 
To stop receiving notification emails like this one, please contact
zivanfi@apache.org.

Mime
View raw message