parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [29/51] [partial] parquet-mr git commit: PARQUET-23: Rename to org.apache.parquet.
Date Mon, 27 Apr 2015 23:12:26 GMT
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/FilteredRecordReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/FilteredRecordReader.java b/parquet-column/src/main/java/parquet/io/FilteredRecordReader.java
deleted file mode 100644
index a6d75ba..0000000
--- a/parquet-column/src/main/java/parquet/io/FilteredRecordReader.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import parquet.column.ColumnReader;
-import parquet.column.impl.ColumnReadStoreImpl;
-import parquet.filter.RecordFilter;
-import parquet.filter.UnboundRecordFilter;
-import parquet.io.api.RecordMaterializer;
-
-/**
- * Extends the
- * @author Jacob Metcalf
- *
- */
-class FilteredRecordReader<T> extends RecordReaderImplementation<T> {
-
-  private final RecordFilter recordFilter;
-  private final long recordCount;
-  private long recordsRead = 0;
-
-  /**
-   * @param root          the root of the schema
-   * @param validating
-   * @param columnStore
-   * @param unboundFilter Filter records, pass in NULL_FILTER to leave unfiltered.
-   */
-  public FilteredRecordReader(MessageColumnIO root, RecordMaterializer<T> recordMaterializer, boolean validating,
-                              ColumnReadStoreImpl columnStore, UnboundRecordFilter unboundFilter, long recordCount) {
-    super(root, recordMaterializer, validating, columnStore);
-    this.recordCount = recordCount;
-    if ( unboundFilter != null ) {
-      recordFilter = unboundFilter.bind(getColumnReaders());
-    } else {
-      recordFilter = null;
-    }
-  }
-
-  /**
-   * Override read() method to provide skip.
-   */
-  @Override
-  public T read() {
-    skipToMatch();
-    if (recordsRead == recordCount) {
-      return null;
-    }
-    ++ recordsRead;
-    return super.read();
-  }
-
-  // FilteredRecordReader skips forwards itself, it never asks the layer above to do the skipping for it.
-  // This is different from how filtering is handled in the filter2 API
-  @Override
-  public boolean shouldSkipCurrentRecord() {
-    return false;
-  }
-
-  /**
-   * Skips forwards until the filter finds the first match. Returns false
-   * if none found.
-   */
-  private void skipToMatch() {
-    while (recordsRead < recordCount && !recordFilter.isMatch()) {
-      State currentState = getState(0);
-      do {
-        ColumnReader columnReader = currentState.column;
-
-        // currentLevel = depth + 1 at this point
-        // set the current value
-        if (columnReader.getCurrentDefinitionLevel() >= currentState.maxDefinitionLevel) {
-          columnReader.skip();
-        }
-        columnReader.consume();
-
-        // Based on repetition level work out next state to go to
-        int nextR = currentState.maxRepetitionLevel == 0 ? 0 : columnReader.getCurrentRepetitionLevel();
-        currentState = currentState.getNextState(nextR);
-      } while (currentState != null);
-      ++ recordsRead;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/GroupColumnIO.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/GroupColumnIO.java b/parquet-column/src/main/java/parquet/io/GroupColumnIO.java
deleted file mode 100644
index 2329d15..0000000
--- a/parquet-column/src/main/java/parquet/io/GroupColumnIO.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import static parquet.schema.Type.Repetition.REPEATED;
-import static parquet.schema.Type.Repetition.REQUIRED;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import parquet.Log;
-import parquet.schema.GroupType;
-
-/**
- * Group level of the IO structure
- *
- *
- * @author Julien Le Dem
- *
- */
-public class GroupColumnIO extends ColumnIO {
-  private static final Log LOG = Log.getLog(GroupColumnIO.class);
-
-  private final Map<String, ColumnIO> childrenByName = new HashMap<String, ColumnIO>();
-  private final List<ColumnIO> children = new ArrayList<ColumnIO>();
-  private int childrenSize = 0;
-
-  GroupColumnIO(GroupType groupType, GroupColumnIO parent, int index) {
-    super(groupType, parent, index);
-  }
-
-  void add(ColumnIO child) {
-    children.add(child);
-    childrenByName.put(child.getType().getName(), child);
-    ++ childrenSize;
-  }
-
-  @Override
-  void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) {
-    super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path);
-    for (ColumnIO child : this.children) {
-      String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1);
-      int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1);
-      newFieldPath[fieldPath.length] = child.getType().getName();
-      newIndexFieldPath[indexFieldPath.length] = child.getIndex();
-      List<ColumnIO> newRepetition;
-      if (child.getType().isRepetition(REPEATED)) {
-        newRepetition = new ArrayList<ColumnIO>(repetition);
-        newRepetition.add(child);
-      } else {
-        newRepetition = repetition;
-      }
-      List<ColumnIO> newPath = new ArrayList<ColumnIO>(path);
-      newPath.add(child);
-      child.setLevels(
-          // the type repetition level increases whenever there's a possible repetition
-          child.getType().isRepetition(REPEATED) ? r + 1 : r,
-          // the type definition level increases whenever a field can be missing (not required)
-          !child.getType().isRepetition(REQUIRED) ? d + 1 : d,
-          newFieldPath,
-          newIndexFieldPath,
-          newRepetition,
-          newPath
-          );
-
-    }
-  }
-
-  @Override
-  List<String[]> getColumnNames() {
-    ArrayList<String[]> result = new ArrayList<String[]>();
-    for (ColumnIO c : children) {
-      result.addAll(c.getColumnNames());
-    }
-    return result;
-  }
-
-  PrimitiveColumnIO getLast() {
-    return children.get(children.size()-1).getLast();
-  }
-
-  PrimitiveColumnIO getFirst() {
-    return children.get(0).getFirst();
-  }
-
-  public ColumnIO getChild(String name) {
-    return childrenByName.get(name);
-  }
-
-  public ColumnIO getChild(int fieldIndex) {
-    try {
-      return children.get(fieldIndex);
-    } catch (IndexOutOfBoundsException e) {
-      throw new InvalidRecordException("could not get child " + fieldIndex + " from " + children, e);
-    }
-  }
-
-  public int getChildrenCount() {
-    return childrenSize;
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/InvalidRecordException.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/InvalidRecordException.java b/parquet-column/src/main/java/parquet/io/InvalidRecordException.java
deleted file mode 100644
index 6423bc4..0000000
--- a/parquet-column/src/main/java/parquet/io/InvalidRecordException.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import parquet.ParquetRuntimeException;
-
-/**
- * thrown when an invalid record is encountered
- *
- * @author Julien Le Dem
- *
- */
-public class InvalidRecordException extends ParquetRuntimeException {
-  private static final long serialVersionUID = 1L;
-
-  public InvalidRecordException() {
-    super();
-  }
-
-  public InvalidRecordException(String message, Throwable cause) {
-    super(message, cause);
-  }
-
-  public InvalidRecordException(String message) {
-    super(message);
-  }
-
-  public InvalidRecordException(Throwable cause) {
-    super(cause);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/MessageColumnIO.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/MessageColumnIO.java b/parquet-column/src/main/java/parquet/io/MessageColumnIO.java
deleted file mode 100644
index 3c09b72..0000000
--- a/parquet-column/src/main/java/parquet/io/MessageColumnIO.java
+++ /dev/null
@@ -1,396 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.List;
-
-import parquet.Log;
-import parquet.column.ColumnWriteStore;
-import parquet.column.ColumnWriter;
-import parquet.column.impl.ColumnReadStoreImpl;
-import parquet.column.page.PageReadStore;
-import parquet.filter.UnboundRecordFilter;
-import parquet.filter2.compat.FilterCompat;
-import parquet.filter2.compat.FilterCompat.Filter;
-import parquet.filter2.compat.FilterCompat.FilterPredicateCompat;
-import parquet.filter2.compat.FilterCompat.NoOpFilter;
-import parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat;
-import parquet.filter2.compat.FilterCompat.Visitor;
-import parquet.filter2.predicate.FilterPredicate;
-import parquet.filter2.recordlevel.FilteringRecordMaterializer;
-import parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicate;
-import parquet.filter2.recordlevel.IncrementallyUpdatedFilterPredicateBuilder;
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-import parquet.io.api.RecordMaterializer;
-import parquet.schema.MessageType;
-
-import static parquet.Preconditions.checkNotNull;
-
-/**
- * Message level of the IO structure
- *
- *
- * @author Julien Le Dem
- *
- */
-public class MessageColumnIO extends GroupColumnIO {
-  private static final Log logger = Log.getLog(MessageColumnIO.class);
-
-  private static final boolean DEBUG = Log.DEBUG;
-
-  private List<PrimitiveColumnIO> leaves;
-
-  private final boolean validating;
-
-  MessageColumnIO(MessageType messageType, boolean validating) {
-    super(messageType, null, 0);
-    this.validating = validating;
-  }
-
-  public List<String[]> getColumnNames() {
-    return super.getColumnNames();
-  }
-
-  public <T> RecordReader<T> getRecordReader(PageReadStore columns,
-                                             RecordMaterializer<T> recordMaterializer) {
-    return getRecordReader(columns, recordMaterializer, FilterCompat.NOOP);
-  }
-
-  /**
-   * @deprecated use {@link #getRecordReader(PageReadStore, RecordMaterializer, Filter)}
-   */
-  @Deprecated
-  public <T> RecordReader<T> getRecordReader(PageReadStore columns,
-                                             RecordMaterializer<T> recordMaterializer,
-                                             UnboundRecordFilter filter) {
-    return getRecordReader(columns, recordMaterializer, FilterCompat.get(filter));
-  }
-
-  public <T> RecordReader<T> getRecordReader(final PageReadStore columns,
-                                             final RecordMaterializer<T> recordMaterializer,
-                                             final Filter filter) {
-    checkNotNull(columns, "columns");
-    checkNotNull(recordMaterializer, "recordMaterializer");
-    checkNotNull(filter, "filter");
-
-    if (leaves.isEmpty()) {
-      return new EmptyRecordReader<T>(recordMaterializer);
-    }
-
-    return filter.accept(new Visitor<RecordReader<T>>() {
-      @Override
-      public RecordReader<T> visit(FilterPredicateCompat filterPredicateCompat) {
-
-        FilterPredicate predicate = filterPredicateCompat.getFilterPredicate();
-        IncrementallyUpdatedFilterPredicateBuilder builder = new IncrementallyUpdatedFilterPredicateBuilder();
-        IncrementallyUpdatedFilterPredicate streamingPredicate = builder.build(predicate);
-        RecordMaterializer<T> filteringRecordMaterializer = new FilteringRecordMaterializer<T>(
-            recordMaterializer,
-            leaves,
-            builder.getValueInspectorsByColumn(),
-            streamingPredicate);
-
-        return new RecordReaderImplementation<T>(
-            MessageColumnIO.this,
-            filteringRecordMaterializer,
-            validating,
-            new ColumnReadStoreImpl(columns, filteringRecordMaterializer.getRootConverter(), getType()));
-      }
-
-      @Override
-      public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
-        return new FilteredRecordReader<T>(
-            MessageColumnIO.this,
-            recordMaterializer,
-            validating,
-            new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType()),
-            unboundRecordFilterCompat.getUnboundRecordFilter(),
-            columns.getRowCount()
-        );
-
-      }
-
-      @Override
-      public RecordReader<T> visit(NoOpFilter noOpFilter) {
-        return new RecordReaderImplementation<T>(
-            MessageColumnIO.this,
-            recordMaterializer,
-            validating,
-            new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType()));
-      }
-    });
-  }
-
-  private class MessageColumnIORecordConsumer extends RecordConsumer {
-    private ColumnIO currentColumnIO;
-    private int currentLevel = 0;
-
-    private class FieldsMarker {
-      private BitSet vistedIndexes = new BitSet();
-
-      @Override
-      public String toString() {
-        return "VistedIndex{" +
-                "vistedIndexes=" + vistedIndexes +
-                '}';
-      }
-
-      public void reset(int fieldsCount) {
-        this.vistedIndexes.clear(0, fieldsCount);
-      }
-
-      public void markWritten(int i) {
-        vistedIndexes.set(i);
-      }
-
-      public boolean isWritten(int i) {
-        return vistedIndexes.get(i);
-      }
-    }
-
-    //track at each level of depth, which fields are written, so nulls can be inserted for the unwritten fields
-    private final FieldsMarker[] fieldsWritten;
-    private final int[] r;
-    private final ColumnWriter[] columnWriter;
-    private final ColumnWriteStore columns;
-    private boolean emptyField = true;
-
-    public MessageColumnIORecordConsumer(ColumnWriteStore columns) {
-      this.columns = columns;
-      int maxDepth = 0;
-      this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()];
-      for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) {
-        maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length);
-        columnWriter[primitiveColumnIO.getId()] = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor());
-      }
-
-      fieldsWritten = new FieldsMarker[maxDepth];
-      for (int i = 0; i < maxDepth; i++) {
-        fieldsWritten[i] = new FieldsMarker();
-      }
-      r = new int[maxDepth];
-    }
-
-    public void printState() {
-      log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]);
-      if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) {
-        // sanity check
-        throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)");
-      }
-    }
-
-    private void log(Object m) {
-      String indent = "";
-      for (int i = 0; i<currentLevel; ++i) {
-        indent += "  ";
-      }
-      logger.debug(indent + m);
-    }
-
-    @Override
-    public void startMessage() {
-      if (DEBUG) log("< MESSAGE START >");
-      currentColumnIO = MessageColumnIO.this;
-      r[0] = 0;
-      int numberOfFieldsToVisit = ((GroupColumnIO)currentColumnIO).getChildrenCount();
-      fieldsWritten[0].reset(numberOfFieldsToVisit);
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void endMessage() {
-      writeNullForMissingFieldsAtCurrentLevel();
-      columns.endRecord();
-      if (DEBUG) log("< MESSAGE END >");
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void startField(String field, int index) {
-      try {
-        if (DEBUG) log("startField(" + field + ", " + index + ")");
-        currentColumnIO = ((GroupColumnIO)currentColumnIO).getChild(index);
-        emptyField = true;
-        if (DEBUG) printState();
-      } catch (RuntimeException e) {
-        throw new ParquetEncodingException("error starting field " + field + " at " + index, e);
-      }
-    }
-
-    @Override
-    public void endField(String field, int index) {
-      if (DEBUG) log("endField(" + field + ", " + index + ")");
-      currentColumnIO = currentColumnIO.getParent();
-      if (emptyField) {
-        throw new ParquetEncodingException("empty fields are illegal, the field should be ommited completely instead");
-      }
-      fieldsWritten[currentLevel].markWritten(index);
-      r[currentLevel] = currentLevel == 0 ? 0 : r[currentLevel - 1];
-      if (DEBUG) printState();
-    }
-
-    private void writeNullForMissingFieldsAtCurrentLevel() {
-      int currentFieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount();
-      for (int i = 0; i < currentFieldsCount; i++) {
-        if (!fieldsWritten[currentLevel].isWritten(i)) {
-          try {
-            ColumnIO undefinedField = ((GroupColumnIO)currentColumnIO).getChild(i);
-            int d = currentColumnIO.getDefinitionLevel();
-            if (DEBUG)
-              log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")");
-            writeNull(undefinedField, r[currentLevel], d);
-          } catch (RuntimeException e) {
-            throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e);
-          }
-        }
-      }
-    }
-
-    private void writeNull(ColumnIO undefinedField, int r, int d) {
-      if (undefinedField.getType().isPrimitive()) {
-        columnWriter[((PrimitiveColumnIO)undefinedField).getId()].writeNull(r, d);
-      } else {
-        GroupColumnIO groupColumnIO = (GroupColumnIO)undefinedField;
-        int childrenCount = groupColumnIO.getChildrenCount();
-        for (int i = 0; i < childrenCount; i++) {
-          writeNull(groupColumnIO.getChild(i), r, d);
-        }
-      }
-    }
-
-    private void setRepetitionLevel() {
-      r[currentLevel] = currentColumnIO.getRepetitionLevel();
-      if (DEBUG) log("r: " + r[currentLevel]);
-    }
-
-    @Override
-    public void startGroup() {
-      if (DEBUG) log("startGroup()");
-
-      ++ currentLevel;
-      r[currentLevel] = r[currentLevel - 1];
-
-      int fieldsCount = ((GroupColumnIO)currentColumnIO).getChildrenCount();
-      fieldsWritten[currentLevel].reset(fieldsCount);
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void endGroup() {
-      if (DEBUG) log("endGroup()");
-      emptyField = false;
-      writeNullForMissingFieldsAtCurrentLevel();
-      -- currentLevel;
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    private ColumnWriter getColumnWriter() {
-      return columnWriter[((PrimitiveColumnIO)currentColumnIO).getId()];
-    }
-
-    @Override
-    public void addInteger(int value) {
-      if (DEBUG) log("addInt(" + value + ")");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void addLong(long value) {
-      if (DEBUG) log("addLong(" + value + ")");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void addBoolean(boolean value) {
-      if (DEBUG) log("addBoolean(" + value + ")");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void addBinary(Binary value) {
-      if (DEBUG) log("addBinary(" + value.length() + " bytes)");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void addFloat(float value) {
-      if (DEBUG) log("addFloat(" + value + ")");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-    @Override
-    public void addDouble(double value) {
-      if (DEBUG) log("addDouble(" + value + ")");
-      emptyField = false;
-      getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel());
-
-      setRepetitionLevel();
-      if (DEBUG) printState();
-    }
-
-  }
-
-  public RecordConsumer getRecordWriter(ColumnWriteStore columns) {
-    RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns);
-    if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter);
-    return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter;
-  }
-
-  void setLevels() {
-    setLevels(0, 0, new String[0], new int[0], Arrays.<ColumnIO>asList(this), Arrays.<ColumnIO>asList(this));
-  }
-
-  void setLeaves(List<PrimitiveColumnIO> leaves) {
-    this.leaves = leaves;
-  }
-
-  public List<PrimitiveColumnIO> getLeaves() {
-    return this.leaves;
-  }
-
-  @Override
-  public MessageType getType() {
-    return (MessageType)super.getType();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/ParquetDecodingException.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/ParquetDecodingException.java b/parquet-column/src/main/java/parquet/io/ParquetDecodingException.java
deleted file mode 100644
index 07a60dc..0000000
--- a/parquet-column/src/main/java/parquet/io/ParquetDecodingException.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import parquet.ParquetRuntimeException;
-
-/**
- * thrown when an encoding problem occured
- *
- * @author Julien Le Dem
- *
- */
-public class ParquetDecodingException extends ParquetRuntimeException {
-  private static final long serialVersionUID = 1L;
-
-  public ParquetDecodingException() {
-  }
-
-  public ParquetDecodingException(String message, Throwable cause) {
-    super(message, cause);
-  }
-
-  public ParquetDecodingException(String message) {
-    super(message);
-  }
-
-  public ParquetDecodingException(Throwable cause) {
-    super(cause);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/ParquetEncodingException.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/ParquetEncodingException.java b/parquet-column/src/main/java/parquet/io/ParquetEncodingException.java
deleted file mode 100644
index 9dcb963..0000000
--- a/parquet-column/src/main/java/parquet/io/ParquetEncodingException.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import parquet.ParquetRuntimeException;
-
-/**
- * thrown when a decoding problem occured
- *
- * @author Julien Le Dem
- *
- */
-public class ParquetEncodingException extends ParquetRuntimeException {
-  private static final long serialVersionUID = 1L;
-
-  public ParquetEncodingException() {
-  }
-
-  public ParquetEncodingException(String message, Throwable cause) {
-    super(message, cause);
-  }
-
-  public ParquetEncodingException(String message) {
-    super(message);
-  }
-
-  public ParquetEncodingException(Throwable cause) {
-    super(cause);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/PrimitiveColumnIO.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/PrimitiveColumnIO.java b/parquet-column/src/main/java/parquet/io/PrimitiveColumnIO.java
deleted file mode 100644
index b912309..0000000
--- a/parquet-column/src/main/java/parquet/io/PrimitiveColumnIO.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-
-import java.util.Arrays;
-import java.util.List;
-
-import parquet.column.ColumnDescriptor;
-import parquet.schema.Type;
-import parquet.schema.PrimitiveType;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-
-
-/**
- * Primitive level of the IO structure
- *
- *
- * @author Julien Le Dem
- *
- */
-public class PrimitiveColumnIO extends ColumnIO {
-//  private static final Logger logger = Logger.getLogger(PrimitiveColumnIO.class.getName());
-
-  private ColumnIO[] path;
-  private ColumnDescriptor columnDescriptor;
-  private final int id;
-
-  PrimitiveColumnIO(Type type, GroupColumnIO parent, int index, int id) {
-    super(type, parent, index);
-    this.id = id;
-  }
-
-  @Override
-  void setLevels(int r, int d, String[] fieldPath, int[] fieldIndexPath, List<ColumnIO> repetition, List<ColumnIO> path) {
-    super.setLevels(r, d, fieldPath, fieldIndexPath, repetition, path);
-    PrimitiveType type = getType().asPrimitiveType();
-    this.columnDescriptor = new ColumnDescriptor(
-        fieldPath, 
-        type.getPrimitiveTypeName(),
-        type.getTypeLength(),
-        getRepetitionLevel(), 
-        getDefinitionLevel());
-    this.path = path.toArray(new ColumnIO[path.size()]);
-  }
-
-  @Override
-  List<String[]> getColumnNames() {
-    return Arrays.asList(new String[][] { getFieldPath() });
-  }
-
-  public ColumnDescriptor getColumnDescriptor() {
-    return columnDescriptor;
-  }
-
-  public ColumnIO[] getPath() {
-    return path;
-  }
-
-  public boolean isLast(int r) {
-    return getLast(r) == this;
-  }
-
-  private PrimitiveColumnIO getLast(int r) {
-    ColumnIO parent = getParent(r);
-
-    PrimitiveColumnIO last = parent.getLast();
-    return last;
-  }
-
-  @Override
-  PrimitiveColumnIO getLast() {
-    return this;
-  }
-
-  @Override
-  PrimitiveColumnIO getFirst() {
-    return this;
-  }
-  public boolean isFirst(int r) {
-    return getFirst(r) == this;
-  }
-
-  private PrimitiveColumnIO getFirst(int r) {
-    ColumnIO parent = getParent(r);
-    return parent.getFirst();
-  }
-
-  public PrimitiveTypeName getPrimitive() {
-    return getType().asPrimitiveType().getPrimitiveTypeName();
-  }
-
-  public int getId() {
-    return id;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/RecordConsumerLoggingWrapper.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/RecordConsumerLoggingWrapper.java b/parquet-column/src/main/java/parquet/io/RecordConsumerLoggingWrapper.java
deleted file mode 100644
index aa47004..0000000
--- a/parquet-column/src/main/java/parquet/io/RecordConsumerLoggingWrapper.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import java.util.Arrays;
-import parquet.Log;
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-
-/**
- * This class can be used to wrap an actual RecordConsumer and log all calls
- *
- * @author Julien Le Dem
- *
- */
-public class RecordConsumerLoggingWrapper extends RecordConsumer {
-    private static final Log logger = Log.getLog(RecordConsumerLoggingWrapper.class);
-    private static final boolean DEBUG = Log.DEBUG;
-
-    private final RecordConsumer delegate;
-
-    int indent = 0;
-
-    /**
-     * all calls a delegate to the wrapped delegate
-     * @param delegate
-     */
-    public RecordConsumerLoggingWrapper(RecordConsumer delegate) {
-      this.delegate = delegate;
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void startField(String field, int index) {
-      if (DEBUG) logOpen(field);
-      delegate.startField(field, index);
-    }
-
-    private void logOpen(String field) {
-      log("<"+field+">");
-    }
-
-    private String indent() {
-      StringBuilder result = new StringBuilder();
-      for (int i = 0; i < indent; i++) {
-        result.append("  ");
-      }
-      return result.toString();
-    }
-
-    private void log(Object value) {
-      logger.debug(indent() + value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void startGroup() {
-      if (DEBUG) ++indent;
-      if (DEBUG) log("<!-- start group -->");
-      delegate.startGroup();
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addInteger(int value) {
-      if (DEBUG) log(value);
-      delegate.addInteger(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addLong(long value) {
-      if (DEBUG) log(value);
-      delegate.addLong(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addBoolean(boolean value) {
-      if (DEBUG) log(value);
-      delegate.addBoolean(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addBinary(Binary value) {
-      if (DEBUG) log(Arrays.toString(value.getBytes()));
-      delegate.addBinary(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addFloat(float value) {
-      if (DEBUG) log(value);
-      delegate.addFloat(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void addDouble(double value) {
-      if (DEBUG) log(value);
-      delegate.addDouble(value);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void endGroup() {
-      if (DEBUG) log("<!-- end group -->");
-      if (DEBUG) --indent;
-      delegate.endGroup();
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void endField(String field, int index) {
-      if (DEBUG) logClose(field);
-      delegate.endField(field, index);
-    }
-
-    private void logClose(String field) {
-      log("</"+field+">");
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void startMessage() {
-      if (DEBUG) log("<!-- start message -->");
-      delegate.startMessage();
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public void endMessage() {
-      delegate.endMessage();
-      if (DEBUG) log("<!-- end message -->");
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/RecordReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/RecordReader.java b/parquet-column/src/main/java/parquet/io/RecordReader.java
deleted file mode 100644
index a0b9e99..0000000
--- a/parquet-column/src/main/java/parquet/io/RecordReader.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-
-/**
- * used to read reassembled records
- * @author Julien Le Dem
- *
- * @param <T> the type of the materialized record
- */
-public abstract class RecordReader<T> {
-
-  /**
-   * Reads one record and returns it.
-   * @return the materialized record
-   */
-  public abstract T read();
-
-  /**
-   * Returns whether the current record should be skipped (dropped)
-   * Will be called *after* read()
-   */
-  public boolean shouldSkipCurrentRecord() {
-    return false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/RecordReaderImplementation.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/RecordReaderImplementation.java b/parquet-column/src/main/java/parquet/io/RecordReaderImplementation.java
deleted file mode 100644
index b83b056..0000000
--- a/parquet-column/src/main/java/parquet/io/RecordReaderImplementation.java
+++ /dev/null
@@ -1,473 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import parquet.Log;
-import parquet.column.ColumnReader;
-import parquet.column.impl.ColumnReadStoreImpl;
-import parquet.io.api.Converter;
-import parquet.io.api.GroupConverter;
-import parquet.io.api.PrimitiveConverter;
-import parquet.io.api.RecordConsumer;
-import parquet.io.api.RecordMaterializer;
-import parquet.schema.MessageType;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-
-
-/**
- * used to read reassembled records
- * @author Julien Le Dem
- *
- * @param <T> the type of the materialized record
- */
-class RecordReaderImplementation<T> extends RecordReader<T> {
-  private static final Log LOG = Log.getLog(RecordReaderImplementation.class);
-
-  public static class Case {
-
-    private int id;
-    private final int startLevel;
-    private final int depth;
-    private final int nextLevel;
-    private final boolean goingUp;
-    private final boolean goingDown;
-    private final int nextState;
-    private final boolean defined;
-
-    public Case(int startLevel, int depth, int nextLevel, int nextState, boolean defined) {
-      this.startLevel = startLevel;
-      this.depth = depth;
-      this.nextLevel = nextLevel;
-      this.nextState = nextState;
-      this.defined = defined;
-      // means going up the tree (towards the leaves) of the record
-      // true if we need to open up groups in this case
-      goingUp = startLevel <= depth;
-      // means going down the tree (towards the root) of the record
-      // true if we need to close groups in this case
-      goingDown = depth + 1 > nextLevel;
-    }
-
-    public void setID(int id) {
-      this.id = id;
-    }
-
-    @Override
-    // this implementation is buggy but the simpler one bellow has duplicates.
-    // it still works but generates more code than necessary
-    // a middle ground is necessary
-//    public int hashCode() {
-//      int hashCode = 0;
-//      if (goingUp) {
-//        hashCode += 1 * (1 + startLevel) + 2 * (1 + depth);
-//      }
-//      if (goingDown) {
-//        hashCode += 3 * (1 + depth) + 5 * (1 + nextLevel);
-//      }
-//      return hashCode;
-//    }
-
-    public int hashCode() {
-      int hashCode = 17;
-      hashCode += 31 * startLevel;
-      hashCode += 31 * depth;
-      hashCode += 31 * nextLevel;
-      hashCode += 31 * nextState;
-      hashCode += 31 * (defined ? 0 : 1);
-      return hashCode;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj instanceof Case) {
-        return equals((Case)obj);
-      }
-      return false;
-    };
-
-    // see comment for hashCode above
-//    public boolean equals(Case other) {
-//      if (goingUp && !other.goingUp || !goingUp && other.goingUp) {
-//        return false;
-//      }
-//      if (goingUp && other.goingUp && (startLevel != other.startLevel || depth != other.depth)) {
-//        return false;
-//      }
-//      if (goingDown && !other.goingDown || !goingDown && other.goingDown) {
-//        return false;
-//      }
-//      if (goingDown && other.goingDown && (depth != other.depth || nextLevel != other.nextLevel)) {
-//        return false;
-//      }
-//      return true;
-//    }
-
-    public boolean equals(Case other) {
-      return startLevel == other.startLevel
-          && depth == other.depth
-          && nextLevel == other.nextLevel
-          && nextState == other.nextState
-          && ((defined && other.defined) || (!defined && !other.defined));
-    }
-
-    public int getID() {
-      return id;
-    }
-
-    public int getStartLevel() {
-      return startLevel;
-    }
-
-    public int getDepth() {
-      return depth;
-    }
-    public int getNextLevel() {
-      return nextLevel;
-    }
-
-    public int getNextState() {
-      return nextState;
-    }
-
-    public boolean isGoingUp() {
-      return goingUp;
-    }
-
-    public boolean isGoingDown() {
-      return goingDown;
-    }
-
-    public boolean isDefined() {
-      return defined;
-    }
-
-    @Override
-    public String toString() {
-      return "Case " + startLevel + " -> " + depth + " -> " + nextLevel + "; goto sate_"+getNextState();
-    }
-
-  }
-
-  public static class State {
-
-    public final int id;
-    public final PrimitiveColumnIO primitiveColumnIO;
-    public final int maxDefinitionLevel;
-    public final int maxRepetitionLevel;
-    public final PrimitiveTypeName primitive;
-    public final ColumnReader column;
-    public final String[] fieldPath; // indexed by currentLevel
-    public final int[] indexFieldPath; // indexed by currentLevel
-    public final GroupConverter[] groupConverterPath;
-    public final PrimitiveConverter primitiveConverter;
-    public final String primitiveField;
-    public final int primitiveFieldIndex;
-    public final int[] nextLevel; //indexed by next r
-
-    private int[] definitionLevelToDepth; // indexed by current d
-    private State[] nextState; // indexed by next r
-    private Case[][][] caseLookup;
-    private List<Case> definedCases;
-    private List<Case> undefinedCases;
-
-    private State(int id, PrimitiveColumnIO primitiveColumnIO, ColumnReader column, int[] nextLevel, GroupConverter[] groupConverterPath, PrimitiveConverter primitiveConverter) {
-      this.id = id;
-      this.primitiveColumnIO = primitiveColumnIO;
-      this.maxDefinitionLevel = primitiveColumnIO.getDefinitionLevel();
-      this.maxRepetitionLevel = primitiveColumnIO.getRepetitionLevel();
-      this.column = column;
-      this.nextLevel = nextLevel;
-      this.groupConverterPath = groupConverterPath;
-      this.primitiveConverter = primitiveConverter;
-      this.primitive = primitiveColumnIO.getType().asPrimitiveType().getPrimitiveTypeName();
-      this.fieldPath = primitiveColumnIO.getFieldPath();
-      this.primitiveField = fieldPath[fieldPath.length - 1];
-      this.indexFieldPath = primitiveColumnIO.getIndexFieldPath();
-      this.primitiveFieldIndex = indexFieldPath[indexFieldPath.length - 1];
-    }
-
-    public int getDepth(int definitionLevel) {
-      return definitionLevelToDepth[definitionLevel];
-    }
-
-    public List<Case> getDefinedCases() {
-      return definedCases;
-    }
-
-    public List<Case> getUndefinedCases() {
-      return undefinedCases;
-    }
-
-    public Case getCase(int currentLevel, int d, int nextR) {
-      return caseLookup[currentLevel][d][nextR];
-    }
-
-    public State getNextState(int nextR) {
-      return nextState[nextR];
-    }
-  }
-
-  private final GroupConverter recordRootConverter;
-  private final RecordMaterializer<T> recordMaterializer;
-
-  private State[] states;
-  private ColumnReader[] columnReaders;
-
-  private boolean shouldSkipCurrentRecord = false;
-
-  /**
-   * @param root the root of the schema
-   * @param recordMaterializer responsible of materializing the records
-   * @param validating whether we should validate against the schema
-   * @param columnStore where to read the column data from
-   */
-  public RecordReaderImplementation(MessageColumnIO root, RecordMaterializer<T> recordMaterializer, boolean validating, ColumnReadStoreImpl columnStore) {
-    this.recordMaterializer = recordMaterializer;
-    this.recordRootConverter = recordMaterializer.getRootConverter(); // TODO: validator(wrap(recordMaterializer), validating, root.getType());
-    PrimitiveColumnIO[] leaves = root.getLeaves().toArray(new PrimitiveColumnIO[root.getLeaves().size()]);
-    columnReaders = new ColumnReader[leaves.length];
-    int[][] nextColumnIdxForRepLevel = new int[leaves.length][];
-    int[][] levelToClose = new int[leaves.length][];
-    GroupConverter[][] groupConverterPaths = new GroupConverter[leaves.length][];
-    PrimitiveConverter[] leafConverters = new PrimitiveConverter[leaves.length];
-    int[] firstIndexForLevel  = new int[256]; // "256 levels of nesting ought to be enough for anybody"
-    // build the automaton
-    for (int i = 0; i < leaves.length; i++) {
-      PrimitiveColumnIO leafColumnIO = leaves[i];
-      //generate converters along the path from root to leaf
-      final int[] indexFieldPath = leafColumnIO.getIndexFieldPath();
-      groupConverterPaths[i] = new GroupConverter[indexFieldPath.length - 1];
-      GroupConverter current = this.recordRootConverter;
-      for (int j = 0; j < indexFieldPath.length - 1; j++) {
-        current = current.getConverter(indexFieldPath[j]).asGroupConverter();
-        groupConverterPaths[i][j] = current;
-      }
-      leafConverters[i] = current.getConverter(indexFieldPath[indexFieldPath.length - 1]).asPrimitiveConverter();
-      columnReaders[i] = columnStore.getColumnReader(leafColumnIO.getColumnDescriptor());
-      int maxRepetitionLevel = leafColumnIO.getRepetitionLevel();
-      nextColumnIdxForRepLevel[i] = new int[maxRepetitionLevel+1];
-
-      levelToClose[i] = new int[maxRepetitionLevel+1]; //next level
-      for (int nextRepLevel = 0; nextRepLevel <= maxRepetitionLevel; ++nextRepLevel) {
-        // remember which is the first for this level
-        if (leafColumnIO.isFirst(nextRepLevel)) {
-          firstIndexForLevel[nextRepLevel] = i;
-        }
-        int nextColIdx;
-        //TODO: when we use nextColumnIdxForRepLevel, should we provide current rep level or the rep level for next item
-        // figure out automaton transition
-        if (nextRepLevel == 0) { // 0 always means jump to the next (the last one being a special case)
-          nextColIdx = i + 1;
-        } else if (leafColumnIO.isLast(nextRepLevel)) { // when we are at the last of the next repetition level we jump back to the first
-          nextColIdx = firstIndexForLevel[nextRepLevel];
-        } else { // otherwise we just go back to the next.
-          nextColIdx = i + 1;
-        }
-        // figure out which level down the tree we need to go back
-        if (nextColIdx == leaves.length) { // reached the end of the record => close all levels
-          levelToClose[i][nextRepLevel] = 0;
-        } else if (leafColumnIO.isLast(nextRepLevel)) { // reached the end of this level => close the repetition level
-          ColumnIO parent = leafColumnIO.getParent(nextRepLevel);
-          levelToClose[i][nextRepLevel] = parent.getFieldPath().length - 1;
-        } else { // otherwise close until the next common parent
-          levelToClose[i][nextRepLevel] = getCommonParentLevel(
-              leafColumnIO.getFieldPath(),
-              leaves[nextColIdx].getFieldPath());
-        }
-        // sanity check: that would be a bug
-        if (levelToClose[i][nextRepLevel] > leaves[i].getFieldPath().length-1) {
-          throw new ParquetEncodingException(Arrays.toString(leaves[i].getFieldPath())+" -("+nextRepLevel+")-> "+levelToClose[i][nextRepLevel]);
-        }
-        nextColumnIdxForRepLevel[i][nextRepLevel] = nextColIdx;
-      }
-    }
-    states = new State[leaves.length];
-    for (int i = 0; i < leaves.length; i++) {
-      states[i] = new State(i, leaves[i], columnReaders[i], levelToClose[i], groupConverterPaths[i], leafConverters[i]);
-
-      int[] definitionLevelToDepth = new int[states[i].primitiveColumnIO.getDefinitionLevel() + 1];
-      // for each possible definition level, determine the depth at which to create groups
-      final ColumnIO[] path = states[i].primitiveColumnIO.getPath();
-      int depth = 0;
-      for (int d = 0; d < definitionLevelToDepth.length; ++d) {
-        while (depth < (states[i].fieldPath.length - 1)
-          && d >= path[depth + 1].getDefinitionLevel()
-          ) {
-          ++ depth;
-        }
-        definitionLevelToDepth[d] = depth - 1;
-      }
-      states[i].definitionLevelToDepth = definitionLevelToDepth;
-    }
-    for (int i = 0; i < leaves.length; i++) {
-      State state = states[i];
-      int[] nextStateIds = nextColumnIdxForRepLevel[i];
-      state.nextState = new State[nextStateIds.length];
-      for (int j = 0; j < nextStateIds.length; j++) {
-        state.nextState[j] = nextStateIds[j] == states.length ? null : states[nextStateIds[j]];
-      }
-    }
-    for (int i = 0; i < states.length; i++) {
-      State state = states[i];
-      final Map<Case, Case> definedCases = new HashMap<Case, Case>();
-      final Map<Case, Case> undefinedCases = new HashMap<Case, Case>();
-      Case[][][] caseLookup = new Case[state.fieldPath.length][][];
-      for (int currentLevel = 0; currentLevel < state.fieldPath.length; ++ currentLevel) {
-        caseLookup[currentLevel] = new Case[state.maxDefinitionLevel+1][];
-        for (int d = 0; d <= state.maxDefinitionLevel; ++ d) {
-          caseLookup[currentLevel][d] = new Case[state.maxRepetitionLevel+1];
-          for (int nextR = 0; nextR <= state.maxRepetitionLevel; ++ nextR) {
-            int caseStartLevel = currentLevel;
-            int caseDepth = Math.max(state.getDepth(d), caseStartLevel - 1);
-            int caseNextLevel = Math.min(state.nextLevel[nextR], caseDepth + 1);
-            Case currentCase = new Case(caseStartLevel, caseDepth, caseNextLevel, getNextReader(state.id, nextR), d == state.maxDefinitionLevel);
-            Map<Case, Case> cases = currentCase.isDefined() ? definedCases : undefinedCases;
-            if (!cases.containsKey(currentCase)) {
-              currentCase.setID(cases.size());
-              cases.put(currentCase, currentCase);
-            } else {
-              currentCase = cases.get(currentCase);
-            }
-            caseLookup[currentLevel][d][nextR] = currentCase;
-          }
-        }
-      }
-      state.caseLookup = caseLookup;
-      state.definedCases = new ArrayList<Case>(definedCases.values());
-      state.undefinedCases = new ArrayList<Case>(undefinedCases.values());
-      Comparator<Case> caseComparator = new Comparator<Case>() {
-        @Override
-        public int compare(Case o1, Case o2) {
-          return o1.id - o2.id;
-        }
-      };
-      Collections.sort(state.definedCases, caseComparator);
-      Collections.sort(state.undefinedCases, caseComparator);
-    }
-  }
-
-  //TODO: have those wrappers for a converter
-  private RecordConsumer validator(RecordConsumer recordConsumer, boolean validating, MessageType schema) {
-    return validating ? new ValidatingRecordConsumer(recordConsumer, schema) : recordConsumer;
-  }
-
-  private RecordConsumer wrap(RecordConsumer recordConsumer) {
-    if (Log.DEBUG) {
-      return new RecordConsumerLoggingWrapper(recordConsumer);
-    }
-    return recordConsumer;
-  }
-
-  /**
-   * @see parquet.io.RecordReader#read()
-   */
-  @Override
-  public T read() {
-    int currentLevel = 0;
-    recordRootConverter.start();
-    State currentState = states[0];
-    do {
-      ColumnReader columnReader = currentState.column;
-      int d = columnReader.getCurrentDefinitionLevel();
-      // creating needed nested groups until the current field (opening tags)
-      int depth = currentState.definitionLevelToDepth[d];
-      for (; currentLevel <= depth; ++currentLevel) {
-        currentState.groupConverterPath[currentLevel].start();
-      }
-      // currentLevel = depth + 1 at this point
-      // set the current value
-      if (d >= currentState.maxDefinitionLevel) {
-        // not null
-        columnReader.writeCurrentValueToConverter();
-      }
-      columnReader.consume();
-
-      int nextR = currentState.maxRepetitionLevel == 0 ? 0 : columnReader.getCurrentRepetitionLevel();
-      // level to go to close current groups
-      int next = currentState.nextLevel[nextR];
-      for (; currentLevel > next; currentLevel--) {
-        currentState.groupConverterPath[currentLevel - 1].end();
-      }
-
-      currentState = currentState.nextState[nextR];
-    } while (currentState != null);
-    recordRootConverter.end();
-    T record = recordMaterializer.getCurrentRecord();
-    shouldSkipCurrentRecord = record == null;
-    if (shouldSkipCurrentRecord) {
-      recordMaterializer.skipCurrentRecord();
-    }
-    return record;
-  }
-
-  @Override
-  public boolean shouldSkipCurrentRecord() {
-    return shouldSkipCurrentRecord;
-  }
-
-  private static void log(String string) {
-    LOG.debug(string);
-  }
-
-  int getNextReader(int current, int nextRepetitionLevel) {
-    State nextState = states[current].nextState[nextRepetitionLevel];
-    return nextState == null ? states.length : nextState.id;
-  }
-
-  int getNextLevel(int current, int nextRepetitionLevel) {
-    return states[current].nextLevel[nextRepetitionLevel];
-  }
-
-  private int getCommonParentLevel(String[] previous, String[] next) {
-    int i = 0;
-    while (i < Math.min(previous.length, next.length) && previous[i].equals(next[i])) {
-      ++i;
-    }
-    return i;
-  }
-
-  protected int getStateCount() {
-    return states.length;
-  }
-
-  protected State getState(int i) {
-    return states[i];
-  }
-
-  protected RecordMaterializer<T> getMaterializer() {
-    return recordMaterializer;
-  }
-
-  protected Converter getRecordConsumer() {
-    return recordRootConverter;
-  }
-
-  protected Iterable<ColumnReader> getColumnReaders() {
-    // Converting the array to an iterable ensures that the array cannot be altered
-    return Arrays.asList(columnReaders);
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/ValidatingRecordConsumer.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/ValidatingRecordConsumer.java b/parquet-column/src/main/java/parquet/io/ValidatingRecordConsumer.java
deleted file mode 100644
index 876afef..0000000
--- a/parquet-column/src/main/java/parquet/io/ValidatingRecordConsumer.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io;
-
-import java.util.ArrayDeque;
-import java.util.Arrays;
-import java.util.Deque;
-
-import parquet.Log;
-import parquet.io.api.Binary;
-import parquet.io.api.RecordConsumer;
-import parquet.schema.MessageType;
-import parquet.schema.Type;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-import parquet.schema.Type.Repetition;
-
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.*;
-
-/**
- * Wraps a record consumer
- * Validates the record written against the schema and pass down the event to the wrapped consumer
- *
- * @author Julien Le Dem
- *
- */
-public class ValidatingRecordConsumer extends RecordConsumer {
-  private static final Log LOG = Log.getLog(ValidatingRecordConsumer.class);
-  private static final boolean DEBUG = Log.DEBUG;
-
-  private final RecordConsumer delegate;
-
-  private Deque<Type> types = new ArrayDeque<Type>();
-  private Deque<Integer> fields = new ArrayDeque<Integer>();
-  private Deque<Integer> previousField = new ArrayDeque<Integer>();
-  private Deque<Integer> fieldValueCount = new ArrayDeque<Integer>();
-
-  /**
-   *
-   * @param delegate the consumer to pass down the event to
-   * @param schema the schema to validate against
-   */
-  public ValidatingRecordConsumer(RecordConsumer delegate, MessageType schema) {
-    this.delegate = delegate;
-    this.types.push(schema);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void startMessage() {
-    previousField.push(-1);
-    delegate.startMessage();
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void endMessage() {
-    delegate.endMessage();
-    validateMissingFields(types.peek().asGroupType().getFieldCount());
-    previousField.pop();
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void startField(String field, int index) {
-    if (index <= previousField.peek()) {
-      throw new InvalidRecordException("fields must be added in order " + field + " index " + index + " is before previous field " + previousField.peek());
-    }
-    validateMissingFields(index);
-    fields.push(index);
-    fieldValueCount.push(0);
-    delegate.startField(field, index);
-  }
-
-  private void validateMissingFields(int index) {
-    for (int i = previousField.peek() + 1; i < index; i++) {
-      Type type = types.peek().asGroupType().getType(i);
-      if (type.isRepetition(Repetition.REQUIRED)) {
-        throw new InvalidRecordException("required field is missing " + type);
-      }
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void endField(String field, int index) {
-    delegate.endField(field, index);
-    fieldValueCount.pop();
-    previousField.push(fields.pop());
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void startGroup() {
-    previousField.push(-1);
-    types.push(types.peek().asGroupType().getType(fields.peek()));
-    delegate.startGroup();
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void endGroup() {
-    delegate.endGroup();
-    validateMissingFields(types.peek().asGroupType().getFieldCount());
-    types.pop();
-    previousField.pop();
-  }
-
-  private void validate(PrimitiveTypeName p) {
-    Type currentType = types.peek().asGroupType().getType(fields.peek());
-    int c = fieldValueCount.pop() + 1;
-    fieldValueCount.push(c);
-    if (DEBUG) LOG.debug("validate " + p + " for " + currentType.getName());
-    switch (currentType.getRepetition()) {
-      case OPTIONAL:
-      case REQUIRED:
-        if (c > 1) {
-          throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
-        }
-        break;
-      case REPEATED:
-        break;
-      default:
-        throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
-    }
-    if (!currentType.isPrimitive() || currentType.asPrimitiveType().getPrimitiveTypeName() != p) {
-      throw new InvalidRecordException("expected type " + p + " but got "+ currentType);
-    }
-  }
-
-  private void validate(PrimitiveTypeName... ptypes) {
-    Type currentType = types.peek().asGroupType().getType(fields.peek());
-    int c = fieldValueCount.pop() + 1;
-    fieldValueCount.push(c);
-    if (DEBUG) LOG.debug("validate " + Arrays.toString(ptypes) + " for " + currentType.getName());
-    switch (currentType.getRepetition()) {
-      case OPTIONAL:
-      case REQUIRED:
-        if (c > 1) {
-          throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
-        }
-        break;
-      case REPEATED:
-        break;
-      default:
-        throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
-    }
-    if (!currentType.isPrimitive()) {
-      throw new InvalidRecordException(
-          "expected type in " + Arrays.toString(ptypes) + " but got " + currentType);
-    }
-    for (PrimitiveTypeName p : ptypes) {
-      if (currentType.asPrimitiveType().getPrimitiveTypeName() == p) {
-        return; // type is valid
-      }
-    }
-    throw new InvalidRecordException(
-        "expected type in " + Arrays.toString(ptypes) + " but got " + currentType);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addInteger(int value) {
-    validate(INT32);
-    delegate.addInteger(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addLong(long value) {
-    validate(INT64);
-    delegate.addLong(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addBoolean(boolean value) {
-    validate(BOOLEAN);
-    delegate.addBoolean(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addBinary(Binary value) {
-    validate(BINARY, INT96, FIXED_LEN_BYTE_ARRAY);
-    delegate.addBinary(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addFloat(float value) {
-    validate(FLOAT);
-    delegate.addFloat(value);
-  }
-
-  /**
-   * {@inheritDoc}
-   */
-  public void addDouble(double value) {
-    validate(DOUBLE);
-    delegate.addDouble(value);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/api/Binary.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/api/Binary.java b/parquet-column/src/main/java/parquet/io/api/Binary.java
deleted file mode 100644
index 7eabfac..0000000
--- a/parquet-column/src/main/java/parquet/io/api/Binary.java
+++ /dev/null
@@ -1,414 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io.api;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.ObjectStreamException;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-
-import parquet.bytes.BytesUtils;
-import parquet.io.ParquetEncodingException;
-
-import static parquet.bytes.BytesUtils.UTF8;
-
-abstract public class Binary implements Comparable<Binary>, Serializable {
-
-  // this isn't really something others should extend
-  private Binary() { }
-
-  public static final Binary EMPTY = fromByteArray(new byte[0]);
-
-  abstract public String toStringUsingUTF8();
-
-  abstract public int length();
-
-  abstract public void writeTo(OutputStream out) throws IOException;
-
-  abstract public void writeTo(DataOutput out) throws IOException;
-
-  abstract public byte[] getBytes();
-
-  abstract boolean equals(byte[] bytes, int offset, int length);
-
-  abstract boolean equals(Binary other);
-
-  abstract public int compareTo(Binary other);
-
-  abstract int compareTo(byte[] bytes, int offset, int length);
-
-  abstract public ByteBuffer toByteBuffer();
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj == null) {
-      return false;
-    }
-    if (obj instanceof Binary) {
-      return equals((Binary)obj);
-    }
-    return false;
-  }
-
-  @Override
-  public String toString() {
-    return "Binary{" + length() + " bytes, " + Arrays.toString(getBytes()) + "}";
-  }
-
-  private static class ByteArraySliceBackedBinary extends Binary {
-    private final byte[] value;
-    private final int offset;
-    private final int length;
-
-    public ByteArraySliceBackedBinary(byte[] value, int offset, int length) {
-      this.value = value;
-      this.offset = offset;
-      this.length = length;
-    }
-
-    @Override
-    public String toStringUsingUTF8() {
-      return UTF8.decode(ByteBuffer.wrap(value, offset, length)).toString();
-      // TODO: figure out why the following line was much slower
-      // rdb: new String(...) is slower because it instantiates a new Decoder,
-      //      while Charset#decode uses a thread-local decoder cache
-      // return new String(value, offset, length, BytesUtils.UTF8);
-    }
-
-    @Override
-    public int length() {
-      return length;
-    }
-
-    @Override
-    public void writeTo(OutputStream out) throws IOException {
-      out.write(value, offset, length);
-    }
-
-    @Override
-    public byte[] getBytes() {
-      return Arrays.copyOfRange(value, offset, offset + length);
-    }
-
-    @Override
-    public int hashCode() {
-      return Binary.hashCode(value, offset, length);
-    }
-
-    @Override
-    boolean equals(Binary other) {
-      return other.equals(value, offset, length);
-    }
-
-    @Override
-    boolean equals(byte[] other, int otherOffset, int otherLength) {
-      return Binary.equals(value, offset, length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public int compareTo(Binary other) {
-      return other.compareTo(value, offset, length);
-    }
-
-    @Override
-    int compareTo(byte[] other, int otherOffset, int otherLength) {
-      return Binary.compareTwoByteArrays(value, offset, length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public ByteBuffer toByteBuffer() {
-      return ByteBuffer.wrap(value, offset, length);
-    }
-
-    @Override
-    public void writeTo(DataOutput out) throws IOException {
-      out.write(value, offset, length);
-    }
-
-  }
-
-  private static class FromStringBinary extends ByteArrayBackedBinary {
-    public FromStringBinary(byte[] value) {
-      super(value);
-    }
-
-    @Override
-    public String toString() {
-      return "Binary{\"" + toStringUsingUTF8() + "\"}";
-    }
-  }
-
-  public static Binary fromByteArray(final byte[] value, final int offset, final int length) {
-    return new ByteArraySliceBackedBinary(value, offset, length);
-  }
-
-  private static class ByteArrayBackedBinary extends Binary {
-    private final byte[] value;
-
-    public ByteArrayBackedBinary(byte[] value) {
-      this.value = value;
-    }
-
-    @Override
-    public String toStringUsingUTF8() {
-      return UTF8.decode(ByteBuffer.wrap(value)).toString();
-    }
-
-    @Override
-    public int length() {
-      return value.length;
-    }
-
-    @Override
-    public void writeTo(OutputStream out) throws IOException {
-      out.write(value);
-    }
-
-    @Override
-    public byte[] getBytes() {
-      return value;
-    }
-
-    @Override
-    public int hashCode() {
-      return Binary.hashCode(value, 0, value.length);
-    }
-
-    @Override
-    boolean equals(Binary other) {
-      return other.equals(value, 0, value.length);
-    }
-
-    @Override
-    boolean equals(byte[] other, int otherOffset, int otherLength) {
-      return Binary.equals(value, 0, value.length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public int compareTo(Binary other) {
-      return other.compareTo(value, 0, value.length);
-    }
-
-    @Override
-    int compareTo(byte[] other, int otherOffset, int otherLength) {
-      return Binary.compareTwoByteArrays(value, 0, value.length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public ByteBuffer toByteBuffer() {
-      return ByteBuffer.wrap(value);
-    }
-
-    @Override
-    public void writeTo(DataOutput out) throws IOException {
-      out.write(value);
-    }
-
-  }
-
-  public static Binary fromByteArray(final byte[] value) {
-    return new ByteArrayBackedBinary(value);
-  }
-
-  private static class ByteBufferBackedBinary extends Binary {
-    private transient ByteBuffer value;
-
-    public ByteBufferBackedBinary(ByteBuffer value) {
-      this.value = value;
-    }
-
-    @Override
-    public String toStringUsingUTF8() {
-      return UTF8.decode(value).toString();
-    }
-
-    @Override
-    public int length() {
-      return value.remaining();
-    }
-
-    @Override
-    public void writeTo(OutputStream out) throws IOException {
-      // TODO: should not have to materialize those bytes
-      out.write(getBytes());
-    }
-
-    @Override
-    public byte[] getBytes() {
-      byte[] bytes = new byte[value.remaining()];
-
-      value.mark();
-      value.get(bytes).reset();
-      return bytes;
-    }
-
-    @Override
-    public int hashCode() {
-      if (value.hasArray()) {
-        return Binary.hashCode(value.array(), value.arrayOffset() + value.position(),
-            value.arrayOffset() + value.remaining());
-      }
-      byte[] bytes = getBytes();
-      return Binary.hashCode(bytes, 0, bytes.length);
-    }
-
-    @Override
-    boolean equals(Binary other) {
-      if (value.hasArray()) {
-        return other.equals(value.array(), value.arrayOffset() + value.position(),
-            value.arrayOffset() + value.remaining());
-      }
-      byte[] bytes = getBytes();
-      return other.equals(bytes, 0, bytes.length);
-    }
-
-    @Override
-    boolean equals(byte[] other, int otherOffset, int otherLength) {
-      if (value.hasArray()) {
-        return Binary.equals(value.array(), value.arrayOffset() + value.position(),
-            value.arrayOffset() + value.remaining(), other, otherOffset, otherLength);
-      }
-      byte[] bytes = getBytes();
-      return Binary.equals(bytes, 0, bytes.length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public int compareTo(Binary other) {
-      if (value.hasArray()) {
-        return other.compareTo(value.array(), value.arrayOffset() + value.position(),
-            value.arrayOffset() + value.remaining());
-      }
-      byte[] bytes = getBytes();
-      return other.compareTo(bytes, 0, bytes.length);
-    }
-
-    @Override
-    int compareTo(byte[] other, int otherOffset, int otherLength) {
-      if (value.hasArray()) {
-        return Binary.compareTwoByteArrays(value.array(), value.arrayOffset() + value.position(),
-            value.arrayOffset() + value.remaining(), other, otherOffset, otherLength);
-      }
-      byte[] bytes = getBytes();
-      return Binary.compareTwoByteArrays(bytes, 0, bytes.length, other, otherOffset, otherLength);
-    }
-
-    @Override
-    public ByteBuffer toByteBuffer() {
-      return value;
-    }
-
-    @Override
-    public void writeTo(DataOutput out) throws IOException {
-      // TODO: should not have to materialize those bytes
-      out.write(getBytes());
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws IOException {
-      byte[] bytes = getBytes();
-      out.writeInt(bytes.length);
-      out.write(bytes);
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
-      int length = in.readInt();
-      byte[] bytes = new byte[length];
-      in.readFully(bytes, 0, length);
-      this.value = ByteBuffer.wrap(bytes);
-    }
-
-    private void readObjectNoData() throws ObjectStreamException {
-      this.value = ByteBuffer.wrap(new byte[0]);
-    }
-
-  }
-
-  public static Binary fromByteBuffer(final ByteBuffer value) {
-    return new ByteBufferBackedBinary(value);
-  }
-
-  public static Binary fromString(final String value) {
-    try {
-      return new FromStringBinary(value.getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      throw new ParquetEncodingException("UTF-8 not supported.", e);
-    }
-  }
-
-  /**
-   * @see {@link Arrays#hashCode(byte[])}
-   * @param array
-   * @param offset
-   * @param length
-   * @return
-   */
-  private static final int hashCode(byte[] array, int offset, int length) {
-    int result = 1;
-    for (int i = offset; i < offset + length; i++) {
-      byte b = array[i];
-      result = 31 * result + b;
-    }
-    return result;
-  }
-
-  /**
-   * @see {@link Arrays#equals(byte[], byte[])}
-   * @param array1
-   * @param offset1
-   * @param length1
-   * @param array2
-   * @param offset2
-   * @param length2
-   * @return
-   */
-  private static final boolean equals(byte[] array1, int offset1, int length1, byte[] array2, int offset2, int length2) {
-    if (array1 == null && array2 == null) return true;
-    if (array1 == null || array2 == null) return false;
-    if (length1 != length2) return false;
-    if (array1 == array2 && offset1 == offset2) return true;
-    for (int i = 0; i < length1; i++) {
-      if (array1[i + offset1] != array2[i + offset2]) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  private static final int compareTwoByteArrays(byte[] array1, int offset1, int length1,
-                                                byte[] array2, int offset2, int length2) {
-    if (array1 == null && array2 == null) return 0;
-    if (array1 == array2 && offset1 == offset2 && length1 == length2) return 0;
-    int min_length = (length1 < length2) ? length1 : length2;
-    for (int i = 0; i < min_length; i++) {
-      if (array1[i + offset1] < array2[i + offset2]) {
-        return 1;
-      }
-      if (array1[i + offset1] > array2[i + offset2]) {
-        return -1;
-      }
-    }
-    // check remainder
-    if (length1 == length2) { return 0; }
-    else if (length1 < length2) { return 1;}
-    else { return -1; }
-  }
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/api/Converter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/api/Converter.java b/parquet-column/src/main/java/parquet/io/api/Converter.java
deleted file mode 100644
index a2f499d..0000000
--- a/parquet-column/src/main/java/parquet/io/api/Converter.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io.api;
-
-/**
- * Represent a tree of converters
- * that materializes tuples
- *
- * @author Julien Le Dem
- *
- */
-public abstract class Converter {
-
-  abstract public boolean isPrimitive();
-
-  public PrimitiveConverter asPrimitiveConverter() {
-    throw new ClassCastException("Expected instance of primitive converter but got \"" + getClass().getName() + "\"");
-  }
-
-  public GroupConverter asGroupConverter() {
-    throw new ClassCastException("Expected instance of group converter but got \"" + getClass().getName() + "\"");
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/api/GroupConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/api/GroupConverter.java b/parquet-column/src/main/java/parquet/io/api/GroupConverter.java
deleted file mode 100644
index 7dd9cfd..0000000
--- a/parquet-column/src/main/java/parquet/io/api/GroupConverter.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io.api;
-
-
-/**
- * converter for group nodes
- *
- * @author Julien Le Dem
- *
- */
-abstract public class GroupConverter extends Converter {
-
-  @Override
-  public boolean isPrimitive() {
-    return false;
-  }
-
-  @Override
-  public GroupConverter asGroupConverter() {
-    return this;
-  }
-
-  /**
-   * called at initialization based on schema
-   * must consistently return the same object
-   * @param fieldIndex index of the field in this group
-   * @return the corresponding converter
-   */
-  abstract public Converter getConverter(int fieldIndex);
-
-  /** runtime calls  **/
-
-  /** called at the beginning of the group managed by this converter */
-  abstract public void start();
-
-  /**
-   * call at the end of the group
-   */
-  abstract public void end();
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/api/PrimitiveConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/api/PrimitiveConverter.java b/parquet-column/src/main/java/parquet/io/api/PrimitiveConverter.java
deleted file mode 100644
index 79ba03a..0000000
--- a/parquet-column/src/main/java/parquet/io/api/PrimitiveConverter.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io.api;
-
-import parquet.column.Dictionary;
-
-/**
- * converter for leaves of the schema
- *
- * @author Julien Le Dem
- *
- */
-abstract public class PrimitiveConverter extends Converter {
-
-  @Override
-  public boolean isPrimitive() {
-    return true;
-  }
-
-  @Override
-  public PrimitiveConverter asPrimitiveConverter() {
-    return this;
-  }
-
-  /**
-   * if it returns true we will attempt to use dictionary based conversion instead
-   * @return if dictionary is supported
-   */
-  public boolean hasDictionarySupport() {
-    return false;
-  }
-
-  /**
-   * Set the dictionary to use if the data was encoded using dictionary encoding
-   * and the converter hasDictionarySupport().
-   * @param dictionary the dictionary to use for conversion
-   */
-  public void setDictionary(Dictionary dictionary) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /** runtime calls  **/
-
-  /**
-   * add a value based on the dictionary set with setDictionary()
-   * Will be used if the Converter has dictionary support and the data was encoded using a dictionary
-   * @param dictionaryId the id in the dictionary of the value to add
-   */
-  public void addValueFromDictionary(int dictionaryId) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addBinary(Binary value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addBoolean(boolean value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addDouble(double value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addFloat(float value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addInt(int value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-  /**
-   * @param value value to set
-   */
-  public void addLong(long value) {
-    throw new UnsupportedOperationException(getClass().getName());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/io/api/RecordConsumer.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/io/api/RecordConsumer.java b/parquet-column/src/main/java/parquet/io/api/RecordConsumer.java
deleted file mode 100644
index ac431e6..0000000
--- a/parquet-column/src/main/java/parquet/io/api/RecordConsumer.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.io.api;
-
-
-/**
- *
- * Abstraction for writing records
- * It decouples the striping algorithm from the actual record model
- * example:
- * <pre>
- * startMessage()
- *  startField("A", 0)
- *   addValue(1)
- *   addValue(2)
- *  endField("A", 0)
- *  startField("B", 1)
- *   startGroup()
- *    startField("C", 0)
- *     addValue(3)
- *    endField("C", 0)
- *   endGroup()
- *  endField("B", 1)
- * endMessage()
- * </pre>
- *
- * would produce the following message:
- * <pre>
- * {
- *   A: [1, 2]
- *   B: {
- *     C: 3
- *   }
- * }
- * </pre>
- * @author Julien Le Dem
- *
- */
-abstract public class RecordConsumer {
-
-  /**
-   * start a new record
-   */
-  abstract public void startMessage();
-
-  /**
-   * end of a record
-   */
-  abstract public void endMessage();
-
-  /**
-   * start of a field in a group or message
-   * if the field is repeated the field is started only once and all values added in between start and end
-   * @param field name of the field
-   * @param index of the field in the group or message
-   */
-  abstract public void startField(String field, int index);
-
-  /**
-   * end of a field in a group or message
-   * @param field name of the field
-   * @param index of the field in the group or message
-   */
-  abstract public void endField(String field, int index);
-
-  /**
-   * start of a group in a field
-   */
-  abstract public void startGroup();
-
-  /**
-   * end of a group in a field
-   */
-  abstract public void endGroup();
-
-  /**
-   * add an int value in the current field
-   * @param value
-   */
-  abstract public void addInteger(int value);
-
-  /**
-   * add a long value in the current field
-   * @param value
-   */
-  abstract public void addLong(long value);
-
-  /**
-   * add a boolean value in the current field
-   * @param value
-   */
-  abstract public void addBoolean(boolean value);
-
-  /**
-   * add a binary value in the current field
-   * @param value
-   */
-  abstract public void addBinary(Binary value);
-
-  /**
-   * add a float value in the current field
-   * @param value
-   */
-  abstract public void addFloat(float value);
-
-  /**
-   * add a double value in the current field
-   * @param value
-   */
-  abstract public void addDouble(double value);
-
-}


Mime
View raw message