parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [27/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet.
Date Mon, 27 Apr 2015 23:12:24 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/Types.java b/parquet-column/src/main/java/parquet/schema/Types.java
deleted file mode 100644
index 7e95110..0000000
--- a/parquet-column/src/main/java/parquet/schema/Types.java
+++ /dev/null
@@ -1,668 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.schema;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import parquet.Preconditions;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-import parquet.schema.Type.ID;
-
-/**
- * This class provides fluent builders that produce Parquet schema Types.
- * <p>
- * The most basic use is to build primitive types:
- * <pre>
- *   Types.required(INT64).named("id");
- *   Types.optional(INT32).named("number");
- * </pre>
- * <p>
- * The {@link #required(PrimitiveTypeName)} factory method produces a primitive
- * type builder, and the {@link PrimitiveBuilder#named(String)} builds the
- * {@link PrimitiveType}. Between {@code required} and {@code named}, other
- * builder methods can be used to add type annotations or other type metadata:
- * <pre>
- *   Types.required(BINARY).as(UTF8).named("username");
- *   Types.optional(FIXED_LEN_BYTE_ARRAY).length(20).named("sha1");
- * </pre>
- * <p>
- * Optional types are built using {@link #optional(PrimitiveTypeName)} to get
- * the builder.
- * <p>
- * Groups are built similarly, using {@code requiredGroup()} (or the optional
- * version) to return a group builder. Group builders provide {@code required}
- * and {@code optional} to add primitive types, which return primitive builders
- * like the versions above.
- * <pre>
- *   // This produces:
- *   // required group User {
- *   //   required int64 id;
- *   //   optional binary email (UTF8);
- *   // }
- *   Types.requiredGroup()
- *            .required(INT64).named("id")
- *            .required(BINARY).as(UTF8).named("email")
- *        .named("User")
- * </pre>
- * <p>
- * When {@code required} is called on a group builder, the builder it returns
- * will add the type to the parent group when it is built and {@code named} will
- * return its parent group builder (instead of the type) so more fields can be
- * added.
- * <p>
- * Sub-groups can be created using {@code requiredGroup()} to get a group
- * builder that will create the group type, add it to the parent builder, and
- * return the parent builder for more fields.
- * <pre>
- *   // required group User {
- *   //   required int64 id;
- *   //   optional binary email (UTF8);
- *   //   optional group address {
- *   //     required binary street (UTF8);
- *   //     required int32 zipcode;
- *   //   }
- *   // }
- *   Types.requiredGroup()
- *            .required(INT64).named("id")
- *            .required(BINARY).as(UTF8).named("email")
- *            .optionalGroup()
- *                .required(BINARY).as(UTF8).named("street")
- *                .required(INT32).named("zipcode")
- *            .named("address")
- *        .named("User")
- * </pre>
- * <p>
- * Message types are built using {@link #buildMessage()} and function just like
- * group builders.
- * <pre>
- *   // message User {
- *   //   required int64 id;
- *   //   optional binary email (UTF8);
- *   //   optional group address {
- *   //     required binary street (UTF8);
- *   //     required int32 zipcode;
- *   //   }
- *   // }
- *   Types.buildMessage()
- *            .required(INT64).named("id")
- *            .required(BINARY).as(UTF8).named("email")
- *            .optionalGroup()
- *                .required(BINARY).as(UTF8).named("street")
- *                .required(INT32).named("zipcode")
- *            .named("address")
- *        .named("User")
- * </pre>
- * <p>
- * These builders enforce consistency checks based on the specifications in
- * the parquet-format documentation. For example, if DECIMAL is used to annotate
- * a FIXED_LEN_BYTE_ARRAY that is not long enough for its maximum precision,
- * these builders will throw an IllegalArgumentException:
- * <pre>
- *   // throws IllegalArgumentException with message:
- *   // "FIXED(4) is not long enough to store 10 digits"
- *   Types.required(FIXED_LEN_BYTE_ARRAY).length(4)
- *        .as(DECIMAL).precision(10)
- *        .named("badDecimal");
- * </pre>
- */
-public class Types {
-  private static final int NOT_SET = 0;
-
-  /**
-   * A base builder for {@link Type} objects.
-   *
-   * @param <P> The type that this builder will return from
-   *          {@link #named(String)} when the type is built.
-   */
-  public abstract static class Builder<T extends Builder, P> {
-    protected final P parent;
-    protected final Class<? extends P> returnClass;
-
-    protected Type.Repetition repetition = null;
-    protected OriginalType originalType = null;
-    protected Type.ID id = null;
-    private boolean repetitionAlreadySet = false;
-
-    /**
-     * Construct a type builder that returns a "parent" object when the builder
-     * is finished. The {@code parent} will be returned by
-     * {@link #named(String)} so that builders can be chained.
-     *
-     * @param parent a non-null object to return from {@link #named(String)}
-     */
-    protected Builder(P parent) {
-      Preconditions.checkNotNull(parent, "Parent cannot be null");
-      this.parent = parent;
-      this.returnClass = null;
-    }
-
-    /**
-     * Construct a type builder that returns the {@link Type} that was built
-     * when the builder is finished. The {@code returnClass} must be the
-     * expected {@code Type} class.
-     *
-     * @param returnClass a {@code Type} to return from {@link #named(String)}
-     */
-    protected Builder(Class<P> returnClass) {
-      Preconditions.checkArgument(Type.class.isAssignableFrom(returnClass),
-          "The requested return class must extend Type");
-      this.returnClass = returnClass;
-      this.parent = null;
-    }
-
-    protected abstract T self();
-
-    protected final T repetition(Type.Repetition repetition) {
-      Preconditions.checkArgument(!repetitionAlreadySet,
-          "Repetition has already been set");
-      Preconditions.checkNotNull(repetition, "Repetition cannot be null");
-      this.repetition = repetition;
-      this.repetitionAlreadySet = true;
-      return self();
-    }
-
-    /**
-     * Adds a type annotation ({@link OriginalType}) to the type being built.
-     * <p>
-     * Type annotations are used to extend the types that parquet can store, by
-     * specifying how the primitive types should be interpreted. This keeps the
-     * set of primitive types to a minimum and reuses parquet's efficient
-     * encodings. For example, strings are stored as byte arrays (binary) with
-     * a UTF8 annotation.
-     *
-     * @param type an {@code OriginalType}
-     * @return this builder for method chaining
-     */
-    public T as(OriginalType type) {
-      this.originalType = type;
-      return self();
-    }
-
-    /**
-     * adds an id annotation to the type being built.
-     * <p>
-     * ids are used to capture the original id when converting from models using ids (thrift, protobufs)
-     *
-     * @param id the id of the field
-     * @return this builder for method chaining
-     */
-    public T id(int id) {
-      this.id = new ID(id);
-      return self();
-    }
-
-    abstract protected Type build(String name);
-
-    /**
-     * Builds a {@link Type} and returns the parent builder, if given, or the
-     * {@code Type} that was built. If returning a parent object that is a
-     * GroupBuilder, the constructed type will be added to it as a field.
-     * <p>
-     * <em>Note:</em> Any configuration for this type builder should be done
-     * before calling this method.
-     *
-     * @param name a name for the constructed type
-     * @return the parent {@code GroupBuilder} or the constructed {@code Type}
-     */
-    public P named(String name) {
-      Preconditions.checkNotNull(name, "Name is required");
-      Preconditions.checkNotNull(repetition, "Repetition is required");
-
-      Type type = build(name);
-      if (parent != null) {
-        // if the parent is a GroupBuilder, add type to it
-        if (GroupBuilder.class.isAssignableFrom(parent.getClass())) {
-          GroupBuilder.class.cast(parent).addField(type);
-        }
-        return parent;
-      } else {
-        // no parent indicates that the Type object should be returned
-        // the constructor check guarantees that returnClass is a Type
-        return returnClass.cast(type);
-      }
-    }
-
-  }
-
-  /**
-   * A builder for {@link PrimitiveType} objects.
-   *
-   * @param <P> The type that this builder will return from
-   *          {@link #named(String)} when the type is built.
-   */
-  public static class PrimitiveBuilder<P> extends Builder<PrimitiveBuilder<P>, P> {
-    private static final long MAX_PRECISION_INT32 = maxPrecision(4);
-    private static final long MAX_PRECISION_INT64 = maxPrecision(8);
-    private final PrimitiveTypeName primitiveType;
-    private int length = NOT_SET;
-    private int precision = NOT_SET;
-    private int scale = NOT_SET;
-
-    private PrimitiveBuilder(P parent, PrimitiveTypeName type) {
-      super(parent);
-      this.primitiveType = type;
-    }
-
-    private PrimitiveBuilder(Class<P> returnType, PrimitiveTypeName type) {
-      super(returnType);
-      this.primitiveType = type;
-    }
-
-    @Override
-    protected PrimitiveBuilder<P> self() {
-      return this;
-    }
-
-    /**
-     * Adds the length for a FIXED_LEN_BYTE_ARRAY.
-     *
-     * @param length an int length
-     * @return this builder for method chaining
-     */
-    public PrimitiveBuilder<P> length(int length) {
-      this.length = length;
-      return this;
-    }
-
-    /**
-     * Adds the precision for a DECIMAL.
-     * <p>
-     * This value is required for decimals and must be less than or equal to
-     * the maximum number of base-10 digits in the underlying type. A 4-byte
-     * fixed, for example, can store up to 9 base-10 digits.
-     *
-     * @param precision an int precision value for the DECIMAL
-     * @return this builder for method chaining
-     */
-    public PrimitiveBuilder<P> precision(int precision) {
-      this.precision = precision;
-      return this;
-    }
-
-    /**
-     * Adds the scale for a DECIMAL.
-     * <p>
-     * This value must be less than the maximum precision of the type and must
-     * be a positive number. If not set, the default scale is 0.
-     * <p>
-     * The scale specifies the number of digits of the underlying unscaled
-     * that are to the right of the decimal point. The decimal interpretation
-     * of values in this column is: {@code value*10^(-scale)}.
-     *
-     * @param scale an int scale value for the DECIMAL
-     * @return this builder for method chaining
-     */
-    public PrimitiveBuilder<P> scale(int scale) {
-      this.scale = scale;
-      return this;
-    }
-
-    @Override
-    protected PrimitiveType build(String name) {
-      if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitiveType) {
-        Preconditions.checkArgument(length > 0,
-            "Invalid FIXED_LEN_BYTE_ARRAY length: " + length);
-      }
-
-      DecimalMetadata meta = decimalMetadata();
-
-      // validate type annotations and required metadata
-      if (originalType != null) {
-        switch (originalType) {
-          case UTF8:
-          case JSON:
-          case BSON:
-            Preconditions.checkState(
-                primitiveType == PrimitiveTypeName.BINARY,
-                originalType.toString() + " can only annotate binary fields");
-            break;
-          case DECIMAL:
-            Preconditions.checkState(
-                (primitiveType == PrimitiveTypeName.INT32) ||
-                (primitiveType == PrimitiveTypeName.INT64) ||
-                (primitiveType == PrimitiveTypeName.BINARY) ||
-                (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY),
-                "DECIMAL can only annotate INT32, INT64, BINARY, and FIXED"
-            );
-            if (primitiveType == PrimitiveTypeName.INT32) {
-              Preconditions.checkState(
-                  meta.getPrecision() <= MAX_PRECISION_INT32,
-                  "INT32 cannot store " + meta.getPrecision() + " digits " +
-                      "(max " + MAX_PRECISION_INT32 + ")");
-            } else if (primitiveType == PrimitiveTypeName.INT64) {
-              Preconditions.checkState(
-                  meta.getPrecision() <= MAX_PRECISION_INT64,
-                  "INT64 cannot store " + meta.getPrecision() + " digits " +
-                  "(max " + MAX_PRECISION_INT64 + ")");
-            } else if (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
-              Preconditions.checkState(
-                  meta.getPrecision() <= maxPrecision(length),
-                  "FIXED(" + length + ") cannot store " + meta.getPrecision() +
-                  " digits (max " + maxPrecision(length) + ")");
-            }
-            break;
-          case DATE:
-          case TIME_MILLIS:
-          case UINT_8:
-          case UINT_16:
-          case UINT_32:
-          case INT_8:
-          case INT_16:
-          case INT_32:
-            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
-                originalType.toString() + " can only annotate INT32");
-            break;
-          case TIMESTAMP_MILLIS:
-          case UINT_64:
-          case INT_64:
-            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT64,
-                originalType.toString() + " can only annotate INT64");
-            break;
-          case INTERVAL:
-            Preconditions.checkState(
-                (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
-                (length == 12),
-                "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)");
-            break;
-          case ENUM:
-            Preconditions.checkState(
-                primitiveType == PrimitiveTypeName.BINARY,
-                "ENUM can only annotate binary fields");
-            break;
-          default:
-            throw new IllegalStateException(originalType + " can not be applied to a primitive type");
-        }
-      }
-
-      return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id);
-    }
-
-    private static long maxPrecision(int numBytes) {
-      return Math.round(                      // convert double to long
-          Math.floor(Math.log10(              // number of base-10 digits
-              Math.pow(2, 8 * numBytes - 1) - 1)  // max value stored in numBytes
-          )
-      );
-    }
-
-    protected DecimalMetadata decimalMetadata() {
-      DecimalMetadata meta = null;
-      if (OriginalType.DECIMAL == originalType) {
-        Preconditions.checkArgument(precision > 0,
-            "Invalid DECIMAL precision: " + precision);
-        Preconditions.checkArgument(scale >= 0,
-            "Invalid DECIMAL scale: " + scale);
-        Preconditions.checkArgument(scale <= precision,
-            "Invalid DECIMAL scale: cannot be greater than precision");
-        meta = new DecimalMetadata(precision, scale);
-      }
-      return meta;
-    }
-  }
-
-  /**
-   * A builder for {@link GroupType} objects.
-   *
-   * @param <P> The type that this builder will return from
-   *          {@link #named(String)} when the type is built.
-   */
-  public static class GroupBuilder<P> extends Builder<GroupBuilder<P>, P> {
-    protected final List<Type> fields;
-
-    private GroupBuilder(P parent) {
-      super(parent);
-      this.fields = new ArrayList<Type>();
-    }
-
-    private GroupBuilder(Class<P> returnType) {
-      super(returnType);
-      this.fields = new ArrayList<Type>();
-    }
-
-    @Override
-    protected GroupBuilder<P> self() {
-      return this;
-    }
-
-    public PrimitiveBuilder<GroupBuilder<P>> primitive(
-        PrimitiveTypeName type, Type.Repetition repetition) {
-      return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
-          .repetition(repetition);
-    }
-
-    /**
-     * Returns a {@link PrimitiveBuilder} for the required primitive type
-     * {@code type}.
-     *
-     * @param type a {@link PrimitiveTypeName}
-     * @return a primitive builder for {@code type} that will return this
-     *          builder for additional fields.
-     */
-    public PrimitiveBuilder<GroupBuilder<P>> required(
-        PrimitiveTypeName type) {
-      return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
-          .repetition(Type.Repetition.REQUIRED);
-    }
-
-    /**
-     * Returns a {@link PrimitiveBuilder} for the optional primitive type
-     * {@code type}.
-     *
-     * @param type a {@link PrimitiveTypeName}
-     * @return a primitive builder for {@code type} that will return this
-     *          builder for additional fields.
-     */
-    public PrimitiveBuilder<GroupBuilder<P>> optional(
-        PrimitiveTypeName type) {
-      return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
-          .repetition(Type.Repetition.OPTIONAL);
-    }
-
-    /**
-     * Returns a {@link PrimitiveBuilder} for the repeated primitive type
-     * {@code type}.
-     *
-     * @param type a {@link PrimitiveTypeName}
-     * @return a primitive builder for {@code type} that will return this
-     *          builder for additional fields.
-     */
-    public PrimitiveBuilder<GroupBuilder<P>> repeated(
-        PrimitiveTypeName type) {
-      return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
-          .repetition(Type.Repetition.REPEATED);
-    }
-
-    public GroupBuilder<GroupBuilder<P>> group(Type.Repetition repetition) {
-      return new GroupBuilder<GroupBuilder<P>>(this)
-          .repetition(repetition);
-    }
-
-    /**
-     * Returns a {@link GroupBuilder} to build a required sub-group.
-     *
-     * @return a group builder that will return this builder for additional
-     *          fields.
-     */
-    public GroupBuilder<GroupBuilder<P>> requiredGroup() {
-      return new GroupBuilder<GroupBuilder<P>>(this)
-          .repetition(Type.Repetition.REQUIRED);
-    }
-
-    /**
-     * Returns a {@link GroupBuilder} to build an optional sub-group.
-     *
-     * @return a group builder that will return this builder for additional
-     *          fields.
-     */
-    public GroupBuilder<GroupBuilder<P>> optionalGroup() {
-      return new GroupBuilder<GroupBuilder<P>>(this)
-          .repetition(Type.Repetition.OPTIONAL);
-    }
-
-    /**
-     * Returns a {@link GroupBuilder} to build a repeated sub-group.
-     *
-     * @return a group builder that will return this builder for additional
-     *          fields.
-     */
-    public GroupBuilder<GroupBuilder<P>> repeatedGroup() {
-      return new GroupBuilder<GroupBuilder<P>>(this)
-          .repetition(Type.Repetition.REPEATED);
-    }
-
-    /**
-     * Adds {@code type} as a sub-field to the group configured by this builder.
-     *
-     * @return this builder for additional fields.
-     */
-    public GroupBuilder<P> addField(Type type) {
-      fields.add(type);
-      return this;
-    }
-
-    /**
-     * Adds {@code types} as sub-fields of the group configured by this builder.
-     *
-     * @return this builder for additional fields.
-     */
-    public GroupBuilder<P> addFields(Type... types) {
-      for (Type type : types) {
-        fields.add(type);
-      }
-      return this;
-    }
-
-    @Override
-    protected GroupType build(String name) {
-      Preconditions.checkState(!fields.isEmpty(),
-          "Cannot build an empty group");
-      return new GroupType(repetition, name, originalType, fields, id);
-    }
-  }
-
-  public static class MessageTypeBuilder extends GroupBuilder<MessageType> {
-    private MessageTypeBuilder() {
-      super(MessageType.class);
-      repetition(Type.Repetition.REQUIRED);
-    }
-
-    /**
-     * Builds and returns the {@link MessageType} configured by this builder.
-     * <p>
-     * <em>Note:</em> All primitive types and sub-groups should be added before
-     * calling this method.
-     *
-     * @param name a name for the constructed type
-     * @return the final {@code MessageType} configured by this builder.
-     */
-    @Override
-    public MessageType named(String name) {
-      Preconditions.checkNotNull(name, "Name is required");
-      return new MessageType(name, fields);
-    }
-  }
-
-  /**
-   * Returns a builder to construct a {@link MessageType}.
-   *
-   * @return a {@link MessageTypeBuilder}
-   */
-  public static MessageTypeBuilder buildMessage() {
-    return new MessageTypeBuilder();
-  }
-
-  public static GroupBuilder<GroupType> buildGroup(
-      Type.Repetition repetition) {
-    return new GroupBuilder<GroupType>(GroupType.class).repetition(repetition);
-  }
-
-  /**
-   * Returns a builder to construct a required {@link GroupType}.
-   *
-   * @return a {@link GroupBuilder}
-   */
-  public static GroupBuilder<GroupType> requiredGroup() {
-    return new GroupBuilder<GroupType>(GroupType.class)
-        .repetition(Type.Repetition.REQUIRED);
-  }
-
-  /**
-   * Returns a builder to construct an optional {@link GroupType}.
-   *
-   * @return a {@link GroupBuilder}
-   */
-  public static GroupBuilder<GroupType> optionalGroup() {
-    return new GroupBuilder<GroupType>(GroupType.class)
-        .repetition(Type.Repetition.OPTIONAL);
-  }
-
-  /**
-   * Returns a builder to construct a repeated {@link GroupType}.
-   *
-   * @return a {@link GroupBuilder}
-   */
-  public static GroupBuilder<GroupType> repeatedGroup() {
-    return new GroupBuilder<GroupType>(GroupType.class)
-        .repetition(Type.Repetition.REPEATED);
-  }
-
-  public static PrimitiveBuilder<PrimitiveType> primitive(
-      PrimitiveTypeName type, Type.Repetition repetition) {
-    return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
-        .repetition(repetition);
-  }
-
-  /**
-   * Returns a builder to construct a required {@link PrimitiveType}.
-   *
-   * @param type a {@link PrimitiveTypeName} for the constructed type
-   * @return a {@link PrimitiveBuilder}
-   */
-  public static PrimitiveBuilder<PrimitiveType> required(
-      PrimitiveTypeName type) {
-    return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
-        .repetition(Type.Repetition.REQUIRED);
-  }
-
-  /**
-   * Returns a builder to construct an optional {@link PrimitiveType}.
-   *
-   * @param type a {@link PrimitiveTypeName} for the constructed type
-   * @return a {@link PrimitiveBuilder}
-   */
-  public static PrimitiveBuilder<PrimitiveType> optional(
-      PrimitiveTypeName type) {
-    return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
-        .repetition(Type.Repetition.OPTIONAL);
-  }
-
-  /**
-   * Returns a builder to construct a repeated {@link PrimitiveType}.
-   *
-   * @param type a {@link PrimitiveTypeName} for the constructed type
-   * @return a {@link PrimitiveBuilder}
-   */
-  public static PrimitiveBuilder<PrimitiveType> repeated(
-      PrimitiveTypeName type) {
-    return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
-        .repetition(Type.Repetition.REPEATED);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
new file mode 100644
index 0000000..ebdbdf8
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
@@ -0,0 +1,123 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.impl;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
+
+import java.util.List;
+
+import org.junit.Test;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.mem.MemPageReader;
+import org.apache.parquet.column.page.mem.MemPageWriter;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+
+public class TestColumnReaderImpl {
+
+  private int rows = 13001;
+
+  private static final class ValidatingConverter extends PrimitiveConverter {
+    int count;
+
+    @Override
+    public void addBinary(Binary value) {
+      assertEquals("bar" + count % 10, value.toStringUsingUTF8());
+      ++ count;
+    }
+  }
+
+  @Test
+  public void test() {
+    MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
+    ColumnDescriptor col = schema.getColumns().get(0);
+    MemPageWriter pageWriter = new MemPageWriter();
+    ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter, new ParquetProperties(1024, PARQUET_2_0, true), 2048);
+    for (int i = 0; i < rows; i++) {
+      columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
+      if ((i + 1) % 1000 == 0) {
+        columnWriterV2.writePage(i);
+      }
+    }
+    columnWriterV2.writePage(rows);
+    columnWriterV2.finalizeColumnChunk();
+    List<DataPage> pages = pageWriter.getPages();
+    int valueCount = 0;
+    int rowCount = 0;
+    for (DataPage dataPage : pages) {
+      valueCount += dataPage.getValueCount();
+      rowCount += ((DataPageV2)dataPage).getRowCount();
+    }
+    assertEquals(rows, rowCount);
+    assertEquals(rows, valueCount);
+    MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
+    ValidatingConverter converter = new ValidatingConverter();
+    ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter);
+    for (int i = 0; i < rows; i++) {
+      assertEquals(0, columnReader.getCurrentRepetitionLevel());
+      assertEquals(0, columnReader.getCurrentDefinitionLevel());
+      columnReader.writeCurrentValueToConverter();
+      columnReader.consume();
+    }
+    assertEquals(rows, converter.count);
+  }
+
+  @Test
+  public void testOptional() {
+    MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
+    ColumnDescriptor col = schema.getColumns().get(0);
+    MemPageWriter pageWriter = new MemPageWriter();
+    ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter, new ParquetProperties(1024, PARQUET_2_0, true), 2048);
+    for (int i = 0; i < rows; i++) {
+      columnWriterV2.writeNull(0, 0);
+      if ((i + 1) % 1000 == 0) {
+        columnWriterV2.writePage(i);
+      }
+    }
+    columnWriterV2.writePage(rows);
+    columnWriterV2.finalizeColumnChunk();
+    List<DataPage> pages = pageWriter.getPages();
+    int valueCount = 0;
+    int rowCount = 0;
+    for (DataPage dataPage : pages) {
+      valueCount += dataPage.getValueCount();
+      rowCount += ((DataPageV2)dataPage).getRowCount();
+    }
+    assertEquals(rows, rowCount);
+    assertEquals(rows, valueCount);
+    MemPageReader pageReader = new MemPageReader((long)rows, pages.iterator(), pageWriter.getDictionaryPage());
+    ValidatingConverter converter = new ValidatingConverter();
+    ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter);
+    for (int i = 0; i < rows; i++) {
+      assertEquals(0, columnReader.getCurrentRepetitionLevel());
+      assertEquals(0, columnReader.getCurrentDefinitionLevel());
+      columnReader.consume();
+    }
+    assertEquals(0, converter.count);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
new file mode 100644
index 0000000..d801442
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
@@ -0,0 +1,164 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.mem;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.column.ColumnWriter;
+import org.apache.parquet.column.ParquetProperties.WriterVersion;
+import org.apache.parquet.column.impl.ColumnReadStoreImpl;
+import org.apache.parquet.column.impl.ColumnWriteStoreV1;
+import org.apache.parquet.column.page.mem.MemPageStore;
+import org.apache.parquet.example.DummyRecordConverter;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+
+public class TestMemColumn {
+  private static final Log LOG = Log.getLog(TestMemColumn.class);
+
+  @Test
+  public void testMemColumn() throws Exception {
+    MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
+    ColumnDescriptor path = schema.getColumnDescription(new String[] {"foo", "bar"});
+    MemPageStore memPageStore = new MemPageStore(10);
+    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
+    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
+    columnWriter.write(42l, 0, 0);
+    memColumnsStore.flush();
+
+    ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
+    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
+      assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
+      assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
+      assertEquals(columnReader.getLong(), 42);
+      columnReader.consume();
+    }
+  }
+
+  private ColumnWriter getColumnWriter(ColumnDescriptor path, MemPageStore memPageStore) {
+    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
+    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
+    return columnWriter;
+  }
+
+  private ColumnReader getColumnReader(MemPageStore memPageStore, ColumnDescriptor path, MessageType schema) {
+    return new ColumnReadStoreImpl(
+        memPageStore,
+        new DummyRecordConverter(schema).getRootConverter(),
+        schema
+        ).getColumnReader(path);
+  }
+
+  @Test
+  public void testMemColumnBinary() throws Exception {
+    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
+    String[] col = new String[]{"foo", "bar"};
+    MemPageStore memPageStore = new MemPageStore(10);
+
+    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
+    ColumnDescriptor path1 = mt.getColumnDescription(col);
+    ColumnDescriptor path = path1;
+
+    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
+    columnWriter.write(Binary.fromString("42"), 0, 0);
+    memColumnsStore.flush();
+
+    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
+    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
+      assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
+      assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
+      assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
+      columnReader.consume();
+    }
+  }
+
+  @Test
+  public void testMemColumnSeveralPages() throws Exception {
+    MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
+    String[] col = new String[]{"foo", "bar"};
+    MemPageStore memPageStore = new MemPageStore(10);
+    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
+    ColumnDescriptor path1 = mt.getColumnDescription(col);
+    ColumnDescriptor path = path1;
+
+    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
+    for (int i = 0; i < 2000; i++) {
+      columnWriter.write(42l, 0, 0);
+    }
+    memColumnsStore.flush();
+
+    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
+    for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
+      assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
+      assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
+      assertEquals(columnReader.getLong(), 42);
+      columnReader.consume();
+    }
+  }
+
+  @Test
+  public void testMemColumnSeveralPagesRepeated() throws Exception {
+    MessageType mt = MessageTypeParser.parseMessageType("message msg { repeated group foo { repeated int64 bar; } }");
+    String[] col = new String[]{"foo", "bar"};
+    MemPageStore memPageStore = new MemPageStore(10);
+    ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
+    ColumnDescriptor path1 = mt.getColumnDescription(col);
+    ColumnDescriptor path = path1;
+
+    ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
+    int[] rs = { 0, 0, 0, 1, 1, 1, 2, 2, 2};
+    int[] ds = { 0, 1, 2, 0, 1, 2, 0, 1, 2};
+    for (int i = 0; i < 837; i++) {
+      int r = rs[i % rs.length];
+      int d = ds[i % ds.length];
+      LOG.debug("write i: " + i);
+      if (d == 2) {
+        columnWriter.write((long)i, r, d);
+      } else {
+        columnWriter.writeNull(r, d);
+      }
+    }
+    memColumnsStore.flush();
+
+    ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
+    int i = 0;
+    for (int j = 0; j < columnReader.getTotalValueCount(); j++) {
+      int r = rs[i % rs.length];
+      int d = ds[i % ds.length];
+      LOG.debug("read i: " + i);
+      assertEquals("r row " + i, r, columnReader.getCurrentRepetitionLevel());
+      assertEquals("d row " + i, d, columnReader.getCurrentDefinitionLevel());
+      if (d == 2) {
+        assertEquals("data row " + i, (long)i, columnReader.getLong());
+      }
+      columnReader.consume();
+      ++ i;
+    }
+  }
+
+  private ColumnWriteStoreV1 newColumnWriteStoreImpl(MemPageStore memPageStore) {
+    return new ColumnWriteStoreV1(memPageStore, 2048, 2048, false, WriterVersion.PARQUET_1_0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java
new file mode 100644
index 0000000..8fcada6
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemPageStore.java
@@ -0,0 +1,61 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.mem;
+
+import static org.apache.parquet.column.Encoding.*;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.page.mem.MemPageStore;
+import org.apache.parquet.column.statistics.LongStatistics;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+
+public class TestMemPageStore {
+
+  private String[] path = { "foo", "bar"};
+
+  @Test
+  public void test() throws IOException {
+    MemPageStore memPageStore = new MemPageStore(10);
+    ColumnDescriptor col = new ColumnDescriptor(path , PrimitiveTypeName.INT64, 2, 2);
+    LongStatistics stats = new LongStatistics();
+    PageWriter pageWriter = memPageStore.getPageWriter(col);
+    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
+    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
+    pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
+    pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
+    PageReader pageReader = memPageStore.getPageReader(col);
+    long totalValueCount = pageReader.getTotalValueCount();
+    System.out.println(totalValueCount);
+    int total = 0;
+    do {
+      DataPage readPage = pageReader.readPage();
+      total += readPage.getValueCount();
+      System.out.println(readPage);
+      // TODO: assert
+    } while (total < totalValueCount);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageReader.java b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageReader.java
new file mode 100644
index 0000000..a6e8910
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageReader.java
@@ -0,0 +1,69 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.page.mem;
+
+import static org.apache.parquet.Log.DEBUG;
+import static org.apache.parquet.Preconditions.checkNotNull;
+
+import java.util.Iterator;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.io.ParquetDecodingException;
+
+
+public class MemPageReader implements PageReader {
+  private static final Log LOG = Log.getLog(MemPageReader.class);
+
+  private final long totalValueCount;
+  private final Iterator<DataPage> pages;
+  private final DictionaryPage dictionaryPage;
+
+  public MemPageReader(long totalValueCount, Iterator<DataPage> pages, DictionaryPage dictionaryPage) {
+    super();
+    checkNotNull(pages, "pages");
+    this.totalValueCount = totalValueCount;
+    this.pages = pages;
+    this.dictionaryPage = dictionaryPage;
+  }
+
+  @Override
+  public long getTotalValueCount() {
+    return totalValueCount;
+  }
+
+  @Override
+  public DataPage readPage() {
+    if (pages.hasNext()) {
+      DataPage next = pages.next();
+      if (DEBUG) LOG.debug("read page " + next);
+      return next;
+    } else {
+      throw new ParquetDecodingException("after last page");
+    }
+  }
+
+  @Override
+  public DictionaryPage readDictionaryPage() {
+    return dictionaryPage;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageStore.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageStore.java b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageStore.java
new file mode 100644
index 0000000..219e5cd
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageStore.java
@@ -0,0 +1,77 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.page.mem;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.UnknownColumnException;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.page.PageWriteStore;
+import org.apache.parquet.column.page.PageWriter;
+
+
+public class MemPageStore implements PageReadStore, PageWriteStore {
+  private static final Log LOG = Log.getLog(MemPageStore.class);
+
+  private Map<ColumnDescriptor, MemPageWriter> pageWriters = new HashMap<ColumnDescriptor, MemPageWriter>();
+
+  private long rowCount;
+
+  public MemPageStore(long rowCount) {
+    super();
+    this.rowCount = rowCount;
+  }
+
+  @Override
+  public PageWriter getPageWriter(ColumnDescriptor path) {
+    MemPageWriter pageWriter = pageWriters.get(path);
+    if (pageWriter == null) {
+      pageWriter = new MemPageWriter();
+      pageWriters.put(path, pageWriter);
+    }
+    return pageWriter;
+  }
+
+  @Override
+  public PageReader getPageReader(ColumnDescriptor descriptor) {
+    MemPageWriter pageWriter = pageWriters.get(descriptor);
+    if (pageWriter == null) {
+      throw new UnknownColumnException(descriptor);
+    }
+    List<DataPage> pages = new ArrayList<DataPage>(pageWriter.getPages());
+    if (Log.DEBUG) LOG.debug("initialize page reader with "+ pageWriter.getTotalValueCount() + " values and " + pages.size() + " pages");
+    return new MemPageReader(pageWriter.getTotalValueCount(), pages.iterator(), pageWriter.getDictionaryPage());
+  }
+
+  @Override
+  public long getRowCount() {
+    return rowCount;
+  }
+
+  public void addRowCount(long count) {
+    rowCount += count;
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
new file mode 100644
index 0000000..d5bfe22
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
@@ -0,0 +1,113 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.page.mem;
+
+import static org.apache.parquet.Log.DEBUG;
+import static org.apache.parquet.bytes.BytesInput.copy;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.io.ParquetEncodingException;
+
+public class MemPageWriter implements PageWriter {
+  private static final Log LOG = Log.getLog(MemPageWriter.class);
+
+  private final List<DataPage> pages = new ArrayList<DataPage>();
+  private DictionaryPage dictionaryPage;
+  private long memSize = 0;
+  private long totalValueCount = 0;
+
+  @Override
+  public void writePage(BytesInput bytesInput, int valueCount, Statistics statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding)
+      throws IOException {
+    if (valueCount == 0) {
+      throw new ParquetEncodingException("illegal page of 0 values");
+    }
+    memSize += bytesInput.size();
+    pages.add(new DataPageV1(BytesInput.copy(bytesInput), valueCount, (int)bytesInput.size(), statistics, rlEncoding, dlEncoding, valuesEncoding));
+    totalValueCount += valueCount;
+    if (DEBUG) LOG.debug("page written for " + bytesInput.size() + " bytes and " + valueCount + " records");
+  }
+
+  @Override
+  public void writePageV2(int rowCount, int nullCount, int valueCount,
+      BytesInput repetitionLevels, BytesInput definitionLevels,
+      Encoding dataEncoding, BytesInput data, Statistics<?> statistics) throws IOException {
+    if (valueCount == 0) {
+      throw new ParquetEncodingException("illegal page of 0 values");
+    }
+    long size = repetitionLevels.size() + definitionLevels.size() + data.size();
+    memSize += size;
+    pages.add(DataPageV2.uncompressed(rowCount, nullCount, valueCount, copy(repetitionLevels), copy(definitionLevels), dataEncoding, copy(data), statistics));
+    totalValueCount += valueCount;
+    if (DEBUG) LOG.debug("page written for " + size + " bytes and " + valueCount + " records");
+
+  }
+
+  @Override
+  public long getMemSize() {
+    return memSize;
+  }
+
+  public List<DataPage> getPages() {
+    return pages;
+  }
+
+  public DictionaryPage getDictionaryPage() {
+    return dictionaryPage;
+  }
+
+  public long getTotalValueCount() {
+    return totalValueCount;
+  }
+
+  @Override
+  public long allocatedSize() {
+    // this store keeps only the bytes written
+    return memSize;
+  }
+
+  @Override
+  public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
+    if (this.dictionaryPage != null) {
+      throw new ParquetEncodingException("Only one dictionary page per block");
+    }
+    this.memSize += dictionaryPage.getBytes().size();
+    this.dictionaryPage = dictionaryPage.copy();
+    if (DEBUG) LOG.debug("dictionary page written for " + dictionaryPage.getBytes().size() + " bytes and " + dictionaryPage.getDictionarySize() + " records");
+  }
+
+  @Override
+  public String memUsageString(String prefix) {
+    return String.format("%s %,d bytes", prefix, memSize);
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
new file mode 100644
index 0000000..e0c6a10
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
@@ -0,0 +1,569 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.statistics;
+
+import static org.junit.Assert.*;
+
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+import org.apache.parquet.io.api.Binary;
+
+public class TestStatistics {
+  private int[] integerArray;
+  private long[] longArray;
+  private float[] floatArray;
+  private double[] doubleArray;
+  private String[] stringArray;
+  private boolean[] booleanArray;
+
+  @Test
+  public void testNumNulls() {
+    IntStatistics stats = new IntStatistics();
+    assertEquals(stats.getNumNulls(), 0);
+
+    stats.incrementNumNulls();
+    stats.incrementNumNulls();
+    stats.incrementNumNulls();
+    stats.incrementNumNulls();
+    assertEquals(stats.getNumNulls(), 4);
+
+    stats.incrementNumNulls(5);
+    assertEquals(stats.getNumNulls(), 9);
+
+    stats.setNumNulls(22);
+    assertEquals(stats.getNumNulls(), 22);
+  }
+
+  @Test
+  public void testIntMinMax() {
+    // Test basic max/min
+    integerArray = new int[] {1, 3, 14, 54, 66, 8, 0, 23, 54};
+    IntStatistics stats = new IntStatistics();
+
+    for (int i: integerArray) {
+      stats.updateStats(i);
+    }
+    assertEquals(stats.getMax(), 66);
+    assertEquals(stats.getMin(), 0);
+
+    // Test negative values
+    integerArray = new int[] {-11, 3, -14, 54, -66, 8, 0, -23, 54};
+    IntStatistics statsNeg = new IntStatistics();
+
+    for (int i: integerArray) {
+      statsNeg.updateStats(i);
+    }
+    assertEquals(statsNeg.getMax(), 54);
+    assertEquals(statsNeg.getMin(), -66);
+
+    // Test converting to and from byte[]
+    byte[] intMaxBytes = statsNeg.getMaxBytes();
+    byte[] intMinBytes = statsNeg.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(intMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), 54);
+    assertEquals(ByteBuffer.wrap(intMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), -66);
+
+    IntStatistics statsFromBytes = new IntStatistics();
+    statsFromBytes.setMinMaxFromBytes(intMinBytes, intMaxBytes);
+
+    assertEquals(statsFromBytes.getMax(), 54);
+    assertEquals(statsFromBytes.getMin(), -66);
+
+    integerArray = new int[] {Integer.MAX_VALUE, Integer.MIN_VALUE};
+    IntStatistics minMaxValues = new IntStatistics();
+
+    for (int i: integerArray) {
+      minMaxValues.updateStats(i);
+    }
+    assertEquals(minMaxValues.getMax(), Integer.MAX_VALUE);
+    assertEquals(minMaxValues.getMin(), Integer.MIN_VALUE);
+
+    // Test converting to and from byte[] for large and small values
+    byte[] intMaxBytesMinMax = minMaxValues.getMaxBytes();
+    byte[] intMinBytesMinMax = minMaxValues.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(intMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), Integer.MAX_VALUE);
+    assertEquals(ByteBuffer.wrap(intMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getInt(), Integer.MIN_VALUE);
+
+    IntStatistics statsFromBytesMinMax= new IntStatistics();
+    statsFromBytesMinMax.setMinMaxFromBytes(intMinBytesMinMax, intMaxBytesMinMax);
+
+    assertEquals(statsFromBytesMinMax.getMax(), Integer.MAX_VALUE);
+    assertEquals(statsFromBytesMinMax.getMin(), Integer.MIN_VALUE);
+
+    // Test print formatting
+    assertEquals(stats.toString(), "min: 0, max: 66, num_nulls: 0");
+  }
+
+  @Test
+  public void testLongMinMax() {
+    // Test basic max/min
+    longArray = new long[] {9, 39, 99, 3, 0, 12, 1000, 65, 542};
+    LongStatistics stats = new LongStatistics();
+
+    for (long l: longArray) {
+      stats.updateStats(l);
+    }
+    assertEquals(stats.getMax(), 1000);
+    assertEquals(stats.getMin(), 0);
+
+    // Test negative values
+    longArray = new long[] {-101, 993, -9914, 54, -9, 89, 0, -23, 90};
+    LongStatistics statsNeg = new LongStatistics();
+
+    for (long l: longArray) {
+      statsNeg.updateStats(l);
+    }
+    assertEquals(statsNeg.getMax(), 993);
+    assertEquals(statsNeg.getMin(), -9914);
+
+    // Test converting to and from byte[]
+    byte[] longMaxBytes = statsNeg.getMaxBytes();
+    byte[] longMinBytes = statsNeg.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(longMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), 993);
+    assertEquals(ByteBuffer.wrap(longMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), -9914);
+
+    LongStatistics statsFromBytes = new LongStatistics();
+    statsFromBytes.setMinMaxFromBytes(longMinBytes, longMaxBytes);
+
+    assertEquals(statsFromBytes.getMax(), 993);
+    assertEquals(statsFromBytes.getMin(), -9914);
+
+    longArray = new long[] {Long.MAX_VALUE, Long.MIN_VALUE};
+    LongStatistics minMaxValues = new LongStatistics();
+
+    for (long l: longArray) {
+      minMaxValues.updateStats(l);
+    }
+    assertEquals(minMaxValues.getMax(), Long.MAX_VALUE);
+    assertEquals(minMaxValues.getMin(), Long.MIN_VALUE);
+
+    // Test converting to and from byte[] for large and small values
+    byte[] longMaxBytesMinMax = minMaxValues.getMaxBytes();
+    byte[] longMinBytesMinMax = minMaxValues.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(longMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), Long.MAX_VALUE);
+    assertEquals(ByteBuffer.wrap(longMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getLong(), Long.MIN_VALUE);
+
+    LongStatistics statsFromBytesMinMax= new LongStatistics();
+    statsFromBytesMinMax.setMinMaxFromBytes(longMinBytesMinMax, longMaxBytesMinMax);
+
+    assertEquals(statsFromBytesMinMax.getMax(), Long.MAX_VALUE);
+    assertEquals(statsFromBytesMinMax.getMin(), Long.MIN_VALUE);
+
+    // Test print formatting
+    assertEquals(stats.toString(), "min: 0, max: 1000, num_nulls: 0");
+  }
+
+  @Test
+  public void testFloatMinMax() {
+    // Test basic max/min
+    floatArray = new float[] {1.5f, 44.5f, 412.99f, 0.65f, 5.6f, 100.6f, 0.0001f, 23.0f, 553.6f};
+    FloatStatistics stats = new FloatStatistics();
+
+    for (float f: floatArray) {
+      stats.updateStats(f);
+    }
+    assertEquals(stats.getMax(), 553.6f, 1e-10);
+    assertEquals(stats.getMin(), 0.0001f, 1e-10);
+
+    // Test negative values
+    floatArray = new float[] {-1.5f, -44.5f, -412.99f, 0.65f, -5.6f, -100.6f, 0.0001f, -23.0f, -3.6f};
+    FloatStatistics statsNeg = new FloatStatistics();
+
+    for (float f: floatArray) {
+      statsNeg.updateStats(f);
+    }
+    assertEquals(statsNeg.getMax(), 0.65f, 1e-10);
+    assertEquals(statsNeg.getMin(), -412.99f, 1e-10);
+
+    // Test converting to and from byte[]
+    byte[] floatMaxBytes = statsNeg.getMaxBytes();
+    byte[] floatMinBytes = statsNeg.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(floatMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), 0.65f, 1e-10);
+    assertEquals(ByteBuffer.wrap(floatMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), -412.99f, 1e-10);
+
+    FloatStatistics statsFromBytes = new FloatStatistics();
+    statsFromBytes.setMinMaxFromBytes(floatMinBytes, floatMaxBytes);
+
+    assertEquals(statsFromBytes.getMax(), 0.65f, 1e-10);
+    assertEquals(statsFromBytes.getMin(), -412.99f, 1e-10);
+
+    floatArray = new float[] {Float.MAX_VALUE, Float.MIN_VALUE};
+    FloatStatistics minMaxValues = new FloatStatistics();
+
+    for (float f: floatArray) {
+      minMaxValues.updateStats(f);
+    }
+    assertEquals(minMaxValues.getMax(), Float.MAX_VALUE, 1e-10);
+    assertEquals(minMaxValues.getMin(), Float.MIN_VALUE, 1e-10);
+
+    // Test converting to and from byte[] for large and small values
+    byte[] floatMaxBytesMinMax = minMaxValues.getMaxBytes();
+    byte[] floatMinBytesMinMax = minMaxValues.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(floatMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), Float.MAX_VALUE, 1e-10);
+    assertEquals(ByteBuffer.wrap(floatMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getFloat(), Float.MIN_VALUE, 1e-10);
+
+    FloatStatistics statsFromBytesMinMax= new FloatStatistics();
+    statsFromBytesMinMax.setMinMaxFromBytes(floatMinBytesMinMax, floatMaxBytesMinMax);
+
+    assertEquals(statsFromBytesMinMax.getMax(), Float.MAX_VALUE, 1e-10);
+    assertEquals(statsFromBytesMinMax.getMin(), Float.MIN_VALUE, 1e-10);
+
+    // Test print formatting
+    assertEquals(stats.toString(), "min: 0.00010, max: 553.59998, num_nulls: 0");
+  }
+
+  @Test
+  public void testDoubleMinMax() {
+    // Test basic max/min
+    doubleArray = new double[] {81.5d, 944.5f, 2.002d, 334.5d, 5.6d, 0.001d, 0.00001d, 23.0d, 553.6d};
+    DoubleStatistics stats = new DoubleStatistics();
+
+    for (double d: doubleArray) {
+      stats.updateStats(d);
+    }
+    assertEquals(stats.getMax(), 944.5d, 1e-10);
+    assertEquals(stats.getMin(), 0.00001d, 1e-10);
+
+    // Test negative values
+    doubleArray = new double[] {-81.5d, -944.5d, 2.002d, -334.5d, -5.6d, -0.001d, -0.00001d, 23.0d, -3.6d};
+    DoubleStatistics statsNeg = new DoubleStatistics();
+
+    for (double d: doubleArray) {
+      statsNeg.updateStats(d);
+    }
+    assertEquals(statsNeg.getMax(), 23.0d, 1e-10);
+    assertEquals(statsNeg.getMin(), -944.5d, 1e-10);
+
+    // Test converting to and from byte[]
+    byte[] doubleMaxBytes = statsNeg.getMaxBytes();
+    byte[] doubleMinBytes = statsNeg.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(doubleMaxBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), 23.0d, 1e-10);
+    assertEquals(ByteBuffer.wrap(doubleMinBytes).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), -944.5d, 1e-10);
+
+    DoubleStatistics statsFromBytes = new DoubleStatistics();
+    statsFromBytes.setMinMaxFromBytes(doubleMinBytes, doubleMaxBytes);
+
+    assertEquals(statsFromBytes.getMax(), 23.0d, 1e-10);
+    assertEquals(statsFromBytes.getMin(), -944.5d, 1e-10);
+
+    doubleArray = new double[] {Double.MAX_VALUE, Double.MIN_VALUE};
+    DoubleStatistics minMaxValues = new DoubleStatistics();
+
+    for (double d: doubleArray) {
+      minMaxValues.updateStats(d);
+    }
+    assertEquals(minMaxValues.getMax(), Double.MAX_VALUE, 1e-10);
+    assertEquals(minMaxValues.getMin(), Double.MIN_VALUE, 1e-10);
+
+    // Test converting to and from byte[] for large and small values
+    byte[] doubleMaxBytesMinMax = minMaxValues.getMaxBytes();
+    byte[] doubleMinBytesMinMax = minMaxValues.getMinBytes();
+
+    assertEquals(ByteBuffer.wrap(doubleMaxBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), Double.MAX_VALUE, 1e-10);
+    assertEquals(ByteBuffer.wrap(doubleMinBytesMinMax).order(java.nio.ByteOrder.LITTLE_ENDIAN).getDouble(), Double.MIN_VALUE, 1e-10);
+
+    DoubleStatistics statsFromBytesMinMax= new DoubleStatistics();
+    statsFromBytesMinMax.setMinMaxFromBytes(doubleMinBytesMinMax, doubleMaxBytesMinMax);
+
+    assertEquals(statsFromBytesMinMax.getMax(), Double.MAX_VALUE, 1e-10);
+    assertEquals(statsFromBytesMinMax.getMin(), Double.MIN_VALUE, 1e-10);
+
+    // Test print formatting
+    assertEquals(stats.toString(), "min: 0.00001, max: 944.50000, num_nulls: 0");
+  }
+
+  @Test
+  public void testBooleanMinMax() {
+    // Test all true
+    booleanArray = new boolean[] {true, true, true};
+    BooleanStatistics statsTrue = new BooleanStatistics();
+
+    for (boolean i: booleanArray) {
+      statsTrue.updateStats(i);
+    }
+    assertTrue(statsTrue.getMax());
+    assertTrue(statsTrue.getMin());
+
+    // Test all false
+    booleanArray = new boolean[] {false, false, false};
+    BooleanStatistics statsFalse = new BooleanStatistics();
+
+    for (boolean i: booleanArray) {
+      statsFalse.updateStats(i);
+    }
+    assertFalse(statsFalse.getMax());
+    assertFalse(statsFalse.getMin());
+
+    booleanArray = new boolean[] {false, true, false};
+    BooleanStatistics statsBoth = new BooleanStatistics();
+
+    for (boolean i: booleanArray) {
+      statsBoth.updateStats(i);
+    }
+    assertTrue(statsBoth.getMax());
+    assertFalse(statsBoth.getMin());
+
+    // Test converting to and from byte[]
+    byte[] boolMaxBytes = statsBoth.getMaxBytes();
+    byte[] boolMinBytes = statsBoth.getMinBytes();
+
+    assertEquals((int)(boolMaxBytes[0] & 255), 1);
+    assertEquals((int)(boolMinBytes[0] & 255), 0);
+
+    BooleanStatistics statsFromBytes = new BooleanStatistics();
+    statsFromBytes.setMinMaxFromBytes(boolMinBytes, boolMaxBytes);
+
+    assertTrue(statsFromBytes.getMax());
+    assertFalse(statsFromBytes.getMin());
+
+    // Test print formatting
+    assertEquals(statsBoth.toString(), "min: false, max: true, num_nulls: 0");
+  }
+
+  @Test
+  public void testBinaryMinMax() {
+    //Test basic max/min
+    stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"};
+    BinaryStatistics stats = new BinaryStatistics();
+
+    for (String s: stringArray) {
+      stats.updateStats(Binary.fromString(s));
+    }
+    assertEquals(stats.getMax(), Binary.fromString("world"));
+    assertEquals(stats.getMin(), Binary.fromString("a"));
+
+    // Test empty string
+    stringArray = new String[] {"", "", "", "", ""};
+    BinaryStatistics statsEmpty = new BinaryStatistics();
+
+    for (String s: stringArray) {
+      statsEmpty.updateStats(Binary.fromString(s));
+    }
+    assertEquals(statsEmpty.getMax(), Binary.fromString(""));
+    assertEquals(statsEmpty.getMin(), Binary.fromString(""));
+
+    // Test converting to and from byte[]
+    byte[] stringMaxBytes = stats.getMaxBytes();
+    byte[] stringMinBytes = stats.getMinBytes();
+
+    assertEquals(new String(stringMaxBytes), "world");
+    assertEquals(new String(stringMinBytes), "a");
+
+    BinaryStatistics statsFromBytes = new BinaryStatistics();
+    statsFromBytes.setMinMaxFromBytes(stringMinBytes, stringMaxBytes);
+
+    assertEquals(statsFromBytes.getMax(), Binary.fromString("world"));
+    assertEquals(statsFromBytes.getMin(), Binary.fromString("a"));
+
+    // Test print formatting
+    assertEquals(stats.toString(), "min: a, max: world, num_nulls: 0");
+  }
+
+  @Test
+  public void testMergingStatistics() {
+    testMergingIntStats();
+    testMergingLongStats();
+    testMergingFloatStats();
+    testMergingDoubleStats();
+    testMergingBooleanStats();
+    testMergingStringStats();
+  }
+
+  private void testMergingIntStats() {
+    integerArray = new int[] {1, 2, 3, 4, 5};
+    IntStatistics intStats = new IntStatistics();
+
+    for (int s: integerArray) {
+      intStats.updateStats(s);
+    }
+
+    integerArray = new int[] {0, 3, 3};
+    IntStatistics intStats2 = new IntStatistics();
+
+    for (int s: integerArray) {
+      intStats2.updateStats(s);
+    }
+    intStats.mergeStatistics(intStats2);
+    assertEquals(intStats.getMax(), 5);
+    assertEquals(intStats.getMin(), 0);
+
+    integerArray = new int[] {-1, -100, 100};
+    IntStatistics intStats3 = new IntStatistics();
+    for (int s: integerArray) {
+      intStats3.updateStats(s);
+    }
+    intStats.mergeStatistics(intStats3);
+
+    assertEquals(intStats.getMax(), 100);
+    assertEquals(intStats.getMin(), -100);
+  }
+
+  private void testMergingLongStats() {
+    longArray = new long[] {1l, 2l, 3l, 4l, 5l};
+    LongStatistics longStats = new LongStatistics();
+
+    for (long s: longArray) {
+      longStats.updateStats(s);
+    }
+
+    longArray = new long[] {0l, 3l, 3l};
+    LongStatistics longStats2 = new LongStatistics();
+
+    for (long s: longArray) {
+      longStats2.updateStats(s);
+    }
+    longStats.mergeStatistics(longStats2);
+    assertEquals(longStats.getMax(), 5l);
+    assertEquals(longStats.getMin(), 0l);
+
+    longArray = new long[] {-1l, -100l, 100l};
+    LongStatistics longStats3 = new LongStatistics();
+    for (long s: longArray) {
+      longStats3.updateStats(s);
+    }
+    longStats.mergeStatistics(longStats3);
+
+    assertEquals(longStats.getMax(), 100l);
+    assertEquals(longStats.getMin(), -100l);
+  }
+
+  private void testMergingFloatStats() {
+    floatArray = new float[] {1.44f, 12.2f, 98.3f, 1.4f, 0.05f};
+    FloatStatistics floatStats = new FloatStatistics();
+
+    for (float s: floatArray) {
+      floatStats.updateStats(s);
+    }
+
+    floatArray = new float[] {0.0001f, 9.9f, 3.1f};
+    FloatStatistics floatStats2 = new FloatStatistics();
+
+    for (float s: floatArray) {
+      floatStats2.updateStats(s);
+    }
+    floatStats.mergeStatistics(floatStats2);
+    assertEquals(floatStats.getMax(), 98.3f, 1e-10);
+    assertEquals(floatStats.getMin(), 0.0001f, 1e-10);
+
+    floatArray = new float[] {-1.91f, -100.9f, 100.54f};
+    FloatStatistics floatStats3 = new FloatStatistics();
+    for (float s: floatArray) {
+      floatStats3.updateStats(s);
+    }
+    floatStats.mergeStatistics(floatStats3);
+
+    assertEquals(floatStats.getMax(), 100.54f, 1e-10);
+    assertEquals(floatStats.getMin(), -100.9f, 1e-10);
+  }
+
+  private void testMergingDoubleStats() {
+    doubleArray = new double[] {1.44d, 12.2d, 98.3d, 1.4d, 0.05d};
+    DoubleStatistics doubleStats = new DoubleStatistics();
+
+    for (double s: doubleArray) {
+      doubleStats.updateStats(s);
+    }
+
+    doubleArray = new double[] {0.0001d, 9.9d, 3.1d};
+    DoubleStatistics doubleStats2 = new DoubleStatistics();
+
+    for (double s: doubleArray) {
+      doubleStats2.updateStats(s);
+    }
+    doubleStats.mergeStatistics(doubleStats2);
+    assertEquals(doubleStats.getMax(), 98.3d, 1e-10);
+    assertEquals(doubleStats.getMin(), 0.0001d, 1e-10);
+
+    doubleArray = new double[] {-1.91d, -100.9d, 100.54d};
+    DoubleStatistics doubleStats3 = new DoubleStatistics();
+    for (double s: doubleArray) {
+      doubleStats3.updateStats(s);
+    }
+    doubleStats.mergeStatistics(doubleStats3);
+
+    assertEquals(doubleStats.getMax(), 100.54d, 1e-10);
+    assertEquals(doubleStats.getMin(), -100.9d, 1e-10);
+  }
+
+  private void testMergingBooleanStats() {
+    booleanArray = new boolean[] {true, true, true};
+    BooleanStatistics booleanStats = new BooleanStatistics();
+
+    for (boolean s: booleanArray) {
+      booleanStats.updateStats(s);
+    }
+
+    booleanArray = new boolean[] {true, false};
+    BooleanStatistics booleanStats2 = new BooleanStatistics();
+
+    for (boolean s: booleanArray) {
+      booleanStats2.updateStats(s);
+    }
+    booleanStats.mergeStatistics(booleanStats2);
+    assertEquals(booleanStats.getMax(), true);
+    assertEquals(booleanStats.getMin(), false);
+
+    booleanArray = new boolean[] {false, false, false, false};
+    BooleanStatistics booleanStats3 = new BooleanStatistics();
+    for (boolean s: booleanArray) {
+      booleanStats3.updateStats(s);
+    }
+    booleanStats.mergeStatistics(booleanStats3);
+
+    assertEquals(booleanStats.getMax(), true);
+    assertEquals(booleanStats.getMin(), false);
+  }
+
+  private void testMergingStringStats() {
+    stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"};
+    BinaryStatistics stats = new BinaryStatistics();
+
+    for (String s: stringArray) {
+      stats.updateStats(Binary.fromString(s));
+    }
+
+    stringArray = new String[] {"zzzz", "asdf", "testing"};
+    BinaryStatistics stats2 = new BinaryStatistics();
+
+    for (String s: stringArray) {
+      stats2.updateStats(Binary.fromString(s));
+    }
+    stats.mergeStatistics(stats2);
+    assertEquals(stats.getMax(), Binary.fromString("zzzz"));
+    assertEquals(stats.getMin(), Binary.fromString("a"));
+
+    stringArray = new String[] {"", "good", "testing"};
+    BinaryStatistics stats3 = new BinaryStatistics();
+    for (String s: stringArray) {
+      stats3.updateStats(Binary.fromString(s));
+    }
+    stats.mergeStatistics(stats3);
+
+    assertEquals(stats.getMax(), Binary.fromString("zzzz"));
+    assertEquals(stats.getMin(), Binary.fromString(""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/values/RandomStr.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/RandomStr.java b/parquet-column/src/test/java/org/apache/parquet/column/values/RandomStr.java
new file mode 100644
index 0000000..8b41c39
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/RandomStr.java
@@ -0,0 +1,56 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values;
+
+import java.util.Random;
+
+/**
+ * 
+ * @author Aniket Mokashi
+ *
+ */
+public class RandomStr {
+  private final char[] alphanumeric=alphanumeric();
+  private final Random rand;
+
+  public RandomStr(){this(null);}
+
+  public RandomStr(Random rand){
+    this.rand=(rand!=null) ? rand : new Random();
+  }
+
+  public String get(int len){
+    StringBuffer out=new StringBuffer();
+
+    while(out.length() < len){
+      int idx=Math.abs(( rand.nextInt() % alphanumeric.length ));
+      out.append(alphanumeric[idx]);
+    }
+    return out.toString();
+  }
+
+  // create alphanumeric char array
+  private char[] alphanumeric(){
+    StringBuffer buf=new StringBuffer(128);
+    for(int i=48; i<= 57;i++)buf.append((char)i); // 0-9
+    for(int i=65; i<= 90;i++)buf.append((char)i); // A-Z
+    for(int i=97; i<=122;i++)buf.append((char)i); // a-z
+    return buf.toString().toCharArray();
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/values/Utils.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/Utils.java b/parquet-column/src/test/java/org/apache/parquet/column/values/Utils.java
new file mode 100644
index 0000000..c9a62b4
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/Utils.java
@@ -0,0 +1,90 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.parquet.io.api.Binary;
+
+/**
+ * Test Utility class
+ * 
+ * @author Aniket Mokashi
+ *
+ */
+public class Utils {
+  private static Random randomLen = new Random();
+  private static RandomStr randomStr = new RandomStr(randomLen);
+  
+  public static String[] getRandomStringSamples(int numSamples, int maxLength) {
+    String[] samples = new String[numSamples];
+    
+    for (int i=0; i < numSamples; i++) {
+      int len = randomLen.nextInt(maxLength);
+      samples[i] = randomStr.get(len);
+    }
+    
+    return samples;
+  }
+  
+  public static void writeInts(ValuesWriter writer, int[] ints)
+      throws IOException {
+    for(int i=0; i < ints.length; i++) {
+      writer.writeInteger(ints[i]);
+    }
+  }
+
+  public static void writeData(ValuesWriter writer, String[] strings)
+      throws IOException {
+    for(int i=0; i < strings.length; i++) {
+      writer.writeBytes(Binary.fromString(strings[i]));
+    }
+  }
+
+  public static Binary[] readData(ValuesReader reader, byte[] data, int offset, int length)
+      throws IOException {
+    Binary[] bins = new Binary[length];
+    reader.initFromPage(length, data, 0);
+    for(int i=0; i < length; i++) {
+      bins[i] = reader.readBytes();
+    }
+    return bins;
+  }
+  
+  public static Binary[] readData(ValuesReader reader, byte[] data, int length)
+      throws IOException {
+    return readData(reader, data, 0, length);
+  }
+  
+  public static int[] readInts(ValuesReader reader, byte[] data, int offset, int length)
+      throws IOException {
+    int[] ints = new int[length];
+    reader.initFromPage(length, data, offset);
+    for(int i=0; i < length; i++) {
+      ints[i] = reader.readInteger();
+    }
+    return ints;
+  }
+  
+  public static int[] readInts(ValuesReader reader, byte[] data, int length)
+      throws IOException {
+    return readInts(reader, data, 0, length);
+  }
+}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/BitPackingPerfTest.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/BitPackingPerfTest.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/BitPackingPerfTest.java
new file mode 100644
index 0000000..e74e787
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/BitPackingPerfTest.java
@@ -0,0 +1,103 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bitpacking;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.parquet.column.values.ValuesReader;
+import org.apache.parquet.column.values.bitpacking.BitPacking.BitPackingWriter;
+
+/**
+ * Improvable micro benchmark for bitpacking
+ * run with: -verbose:gc -Xmx2g -Xms2g
+ * @author Julien Le Dem
+ *
+ */
+public class BitPackingPerfTest {
+
+  public static void main(String[] args) throws IOException {
+    int COUNT = 800000;
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    BitPackingWriter w = BitPacking.getBitPackingWriter(1, baos);
+    long t0 = System.currentTimeMillis();
+    for (int i = 0 ; i < COUNT; ++i) {
+      w.write(i % 2);
+    }
+    w.finish();
+    long t1 = System.currentTimeMillis();
+    System.out.println("written in " + (t1 - t0) + "ms");
+    System.out.println();
+    byte[] bytes = baos.toByteArray();
+    System.out.println(bytes.length);
+    int[] result = new int[COUNT];
+    for (int l = 0; l < 5; l++) {
+      long s = manual(bytes, result);
+      long b = generated(bytes, result);
+      float ratio = (float)b/s;
+      System.out.println("                                             " + ratio + (ratio < 1 ? " < 1 => GOOD" : " >= 1 => BAD"));
+    }
+  }
+
+  private static void verify(int[] result) {
+    int error = 0;
+    for (int i = 0 ; i < result.length; ++i) {
+      if (result[i] != i % 2) {
+        error ++;
+      }
+    }
+    if (error != 0) {
+      throw new RuntimeException("errors: " + error + " / " + result.length);
+    }
+  }
+
+  private static long manual(byte[] bytes, int[] result)
+      throws IOException {
+    return readNTimes(bytes, result, new BitPackingValuesReader(1));
+  }
+
+  private static long generated(byte[] bytes, int[] result)
+      throws IOException {
+    return readNTimes(bytes, result, new ByteBitPackingValuesReader(1, Packer.BIG_ENDIAN));
+  }
+
+  private static long readNTimes(byte[] bytes, int[] result, ValuesReader r)
+      throws IOException {
+    System.out.println();
+    long t = 0;
+    int N = 10;
+    System.gc();
+    System.out.print("                                             " + r.getClass().getSimpleName());
+    System.out.print(" no gc <");
+    for (int k = 0; k < N; k++) {
+      long t2 = System.nanoTime();
+      r.initFromPage(result.length, bytes, 0);
+      for (int i = 0; i < result.length; i++) {
+        result[i] = r.readInteger();
+      }
+      long t3 = System.nanoTime();
+      t += t3 - t2;
+    }
+    System.out.println("> read in " + t/1000 + "µs " + (N * result.length / (t / 1000)) + " values per µs");
+    verify(result);
+    return t;
+  }
+
+}
+


Mime
View raw message