hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject [5/7] hbase git commit: HBASE-19179 Remove hbase-prefix-tree
Date Sat, 04 Nov 2017 17:10:04 GMT
http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
deleted file mode 100644
index 8ba8828..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.PrivateCellUtil;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
-import org.apache.hadoop.hbase.io.CellOutputStream;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
-import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
-import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet;
-import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-import org.apache.hadoop.io.WritableUtils;
-/**
- * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
- * added they are completely copied into the various encoding structures. This is important because
- * usually the cells being fed in during compactions will be transient.<br>
- * <br>
- * Usage:<br>
- * 1) constructor<br>
- * 4) append cells in sorted order: write(Cell cell)<br>
- * 5) flush()<br>
- */
-@InterfaceAudience.Private
-public class PrefixTreeEncoder implements CellOutputStream {
-
-  /**************** static ************************/
-
-  protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
-
-  //future-proof where HBase supports multiple families in a data block.
-  public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
-
-  private static final boolean USE_HASH_COLUMN_SORTER = true;
-  private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
-  private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
-
-
-  /**************** fields *************************/
-
-  protected long numResets = 0L;
-
-  protected OutputStream outputStream;
-
-  /*
-   * Cannot change during a single block's encoding. If false, then substitute incoming Cell's
-   * mvccVersion with zero and write out the block as usual.
-   */
-  protected boolean includeMvccVersion;
-
-  /*
-   * reusable ByteRanges used for communicating with the sorters/compilers
-   */
-  protected ByteRange rowRange;
-  protected ByteRange familyRange;
-  protected ByteRange qualifierRange;
-  protected ByteRange tagsRange;
-
-  /*
-   * incoming Cell fields are copied into these arrays
-   */
-  protected long[] timestamps;
-  protected long[] mvccVersions;
-  protected byte[] typeBytes;
-  protected int[] valueOffsets;
-  protected int[] tagsOffsets;
-  protected byte[] values;
-  protected byte[] tags;
-
-  protected PrefixTreeBlockMeta blockMeta;
-
-  /*
-   * Sub-encoders for the simple long/byte fields of a Cell.  Add to these as each cell arrives and
-   * compile before flushing.
-   */
-  protected LongEncoder timestampEncoder;
-  protected LongEncoder mvccVersionEncoder;
-  protected CellTypeEncoder cellTypeEncoder;
-
-  /*
-   * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
-   * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
-   * comparing only with the previous row key, families and qualifiers can arrive in unsorted order
-   * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
-   */
-  protected ByteRangeSet familyDeduplicator;
-  protected ByteRangeSet qualifierDeduplicator;
-  protected ByteRangeSet tagsDeduplicator;
-  /*
-   * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
-   * trie structure with nodes connected by memory pointers (not serializable yet).
-   */
-  protected Tokenizer rowTokenizer;
-  protected Tokenizer familyTokenizer;
-  protected Tokenizer qualifierTokenizer;
-  protected Tokenizer tagsTokenizer;
-
-  /*
-   * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
-   * all information to an output stream of bytes that can be stored on disk.
-   */
-  protected RowSectionWriter rowWriter;
-  protected ColumnSectionWriter familyWriter;
-  protected ColumnSectionWriter qualifierWriter;
-  protected ColumnSectionWriter tagsWriter;
-
-  /*
-   * Integers used for counting cells and bytes.  We keep track of the size of the Cells as if they
-   * were full KeyValues because some parts of HBase like to know the "unencoded size".
-   */
-  protected int totalCells = 0;
-  protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
-  protected int totalValueBytes = 0;
-  protected int totalTagBytes = 0;
-  protected int maxValueLength = 0;
-  protected int maxTagLength = 0;
-  protected int totalBytes = 0;//
-
-
-  /***************** construct ***********************/
-
-  public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
-    // used during cell accumulation
-    this.blockMeta = new PrefixTreeBlockMeta();
-    this.rowRange = new SimpleMutableByteRange();
-    this.familyRange = new SimpleMutableByteRange();
-    this.qualifierRange = new SimpleMutableByteRange();
-    this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
-    this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
-    this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
-    this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
-    this.values = new byte[VALUE_BUFFER_INIT_SIZE];
-
-    // used during compilation
-    this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
-        : new ByteRangeTreeSet();
-    this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
-        : new ByteRangeTreeSet();
-    this.timestampEncoder = new LongEncoder();
-    this.mvccVersionEncoder = new LongEncoder();
-    this.cellTypeEncoder = new CellTypeEncoder();
-    this.rowTokenizer = new Tokenizer();
-    this.familyTokenizer = new Tokenizer();
-    this.qualifierTokenizer = new Tokenizer();
-    this.rowWriter = new RowSectionWriter();
-    this.familyWriter = new ColumnSectionWriter();
-    this.qualifierWriter = new ColumnSectionWriter();
-    initializeTagHelpers();
-
-    reset(outputStream, includeMvccVersion);
-  }
-
-  public void reset(OutputStream outputStream, boolean includeMvccVersion) {
-    ++numResets;
-    this.includeMvccVersion = includeMvccVersion;
-    this.outputStream = outputStream;
-    valueOffsets[0] = 0;
-    familyDeduplicator.reset();
-    qualifierDeduplicator.reset();
-    tagsDeduplicator.reset();
-    tagsWriter.reset();
-    tagsTokenizer.reset();
-    rowTokenizer.reset();
-    timestampEncoder.reset();
-    mvccVersionEncoder.reset();
-    cellTypeEncoder.reset();
-    familyTokenizer.reset();
-    qualifierTokenizer.reset();
-    rowWriter.reset();
-    familyWriter.reset();
-    qualifierWriter.reset();
-
-    totalCells = 0;
-    totalUnencodedBytes = 0;
-    totalValueBytes = 0;
-    maxValueLength = 0;
-    totalBytes = 0;
-  }
-
-  protected void initializeTagHelpers() {
-    this.tagsRange = new SimpleMutableByteRange();
-    this.tagsDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
-    : new ByteRangeTreeSet();
-    this.tagsTokenizer = new Tokenizer();
-    this.tagsWriter = new ColumnSectionWriter();
-  }
-
-  /**
-   * Check that the arrays used to hold cell fragments are large enough for the cell that is being
-   * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
-   * first few block encodings but should stabilize quickly.
-   */
-  protected void ensurePerCellCapacities() {
-    int currentCapacity = valueOffsets.length;
-    int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
-    if (neededCapacity < currentCapacity) {
-      return;
-    }
-
-    int padding = neededCapacity;//this will double the array size
-    timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
-    mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
-    typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
-    valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
-  }
-
-  /******************** CellOutputStream methods *************************/
-
-  /**
-   * Note: Unused until support is added to the scanner/heap
-   * <p/>
-   * The following method are optimized versions of write(Cell cell). The result should be
-   * identical, however the implementation may be able to execute them much more efficiently because
-   * it does not need to compare the unchanged fields with the previous cell's.
-   * <p/>
-   * Consider the benefits during compaction when paired with a CellScanner that is also aware of
-   * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
-   * to the write(Cell cell) method.
-   * <p/>
-   * The savings of skipping duplicate row detection are significant with long row keys. A
-   * DataBlockEncoder may store a row key once in combination with a count of how many cells are in
-   * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
-   * of the counter, and that is for every cell in the row.
-   */
-
-  /**
-   * Add a Cell to the output stream but repeat the previous row. 
-   */
-  //@Override
-  public void writeWithRepeatRow(Cell cell) {
-    ensurePerCellCapacities();//can we optimize away some of this?
-
-    //save a relatively expensive row comparison, incrementing the row's counter instead
-    rowTokenizer.incrementNumOccurrencesOfLatestValue();
-    addFamilyPart(cell);
-    addQualifierPart(cell);
-    addAfterRowFamilyQualifier(cell);
-  }
-
-
-  @Override
-  public void write(Cell cell) {
-    ensurePerCellCapacities();
-
-    rowTokenizer.addSorted(PrivateCellUtil.fillRowRange(cell, rowRange));
-    addFamilyPart(cell);
-    addQualifierPart(cell);
-    addTagPart(cell);
-    addAfterRowFamilyQualifier(cell);
-  }
-
-
-  private void addTagPart(Cell cell) {
-    PrivateCellUtil.fillTagRange(cell, tagsRange);
-    tagsDeduplicator.add(tagsRange);
-  }
-
-  /***************** internal add methods ************************/
-
-  private void addAfterRowFamilyQualifier(Cell cell){
-    // timestamps
-    timestamps[totalCells] = cell.getTimestamp();
-    timestampEncoder.add(cell.getTimestamp());
-
-    // memstore timestamps
-    if (includeMvccVersion) {
-      mvccVersions[totalCells] = cell.getSequenceId();
-      mvccVersionEncoder.add(cell.getSequenceId());
-      totalUnencodedBytes += WritableUtils.getVIntSize(cell.getSequenceId());
-    }else{
-      //must overwrite in case there was a previous version in this array slot
-      mvccVersions[totalCells] = 0L;
-      if(totalCells == 0){//only need to do this for the first cell added
-        mvccVersionEncoder.add(0L);
-      }
-      //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
-    }
-
-    // types
-    typeBytes[totalCells] = cell.getTypeByte();
-    cellTypeEncoder.add(cell.getTypeByte());
-
-    // values
-    totalValueBytes += cell.getValueLength();
-    // double the array each time we run out of space
-    values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
-    CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]);
-    if (cell.getValueLength() > maxValueLength) {
-      maxValueLength = cell.getValueLength();
-    }
-    valueOffsets[totalCells + 1] = totalValueBytes;
-
-    // general
-    totalUnencodedBytes += KeyValueUtil.length(cell);
-    ++totalCells;
-  }
-
-  private void addFamilyPart(Cell cell) {
-    if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
-      PrivateCellUtil.fillFamilyRange(cell, familyRange);
-      familyDeduplicator.add(familyRange);
-    }
-  }
-
-  private void addQualifierPart(Cell cell) {
-    PrivateCellUtil.fillQualifierRange(cell, qualifierRange);
-    qualifierDeduplicator.add(qualifierRange);
-  }
-
-
-  /****************** compiling/flushing ********************/
-
-  /**
-   * Expensive method.  The second half of the encoding work happens here.
-   *
-   * Take all the separate accumulated data structures and turn them into a single stream of bytes
-   * which is written to the outputStream.
-   */
-  @Override
-  public void flush() throws IOException {
-    compile();
-
-    // do the actual flushing to the output stream.  Order matters.
-    blockMeta.writeVariableBytesToOutputStream(outputStream);
-    rowWriter.writeBytes(outputStream);
-    familyWriter.writeBytes(outputStream);
-    qualifierWriter.writeBytes(outputStream);
-    tagsWriter.writeBytes(outputStream);
-    timestampEncoder.writeBytes(outputStream);
-    mvccVersionEncoder.writeBytes(outputStream);
-    //CellType bytes are in the row nodes.  there is no additional type section
-    outputStream.write(values, 0, totalValueBytes);
-  }
-
-  /**
-   * Now that all the cells have been added, do the work to reduce them to a series of byte[]
-   * fragments that are ready to be written to the output stream.
-   */
-  protected void compile(){
-    blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
-    int lastValueOffset = valueOffsets[totalCells];
-    blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
-    blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
-    blockMeta.setNumValueBytes(totalValueBytes);
-    totalBytes += totalTagBytes + totalValueBytes;
-
-    //these compile methods will add to totalBytes
-    compileTypes();
-    compileMvccVersions();
-    compileTimestamps();
-    compileTags();
-    compileQualifiers();
-    compileFamilies();
-    compileRows();
-
-    int numMetaBytes = blockMeta.calculateNumMetaBytes();
-    blockMeta.setNumMetaBytes(numMetaBytes);
-    totalBytes += numMetaBytes;
-  }
-
-  /**
-   * <p>
-   * The following "compile" methods do any intermediate work necessary to transform the cell
-   * fragments collected during the writing phase into structures that are ready to write to the
-   * outputStream.
-   * </p>
-   * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
-   */
-
-  protected void compileTypes() {
-    blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
-    if(cellTypeEncoder.areAllSameType()){
-      blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
-    }
-  }
-
-  protected void compileMvccVersions() {
-    mvccVersionEncoder.compile();
-    blockMeta.setMvccVersionFields(mvccVersionEncoder);
-    int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
-    totalBytes += numMvccVersionBytes;
-  }
-
-  protected void compileTimestamps() {
-    timestampEncoder.compile();
-    blockMeta.setTimestampFields(timestampEncoder);
-    int numTimestampBytes = timestampEncoder.getOutputArrayLength();
-    totalBytes += numTimestampBytes;
-  }
-
-  protected void compileQualifiers() {
-    blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
-    qualifierDeduplicator.compile();
-    qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
-    qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, ColumnNodeType.QUALIFIER);
-    qualifierWriter.compile();
-    int numQualifierBytes = qualifierWriter.getNumBytes();
-    blockMeta.setNumQualifierBytes(numQualifierBytes);
-    totalBytes += numQualifierBytes;
-  }
-
-  protected void compileFamilies() {
-    blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
-    familyDeduplicator.compile();
-    familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
-    familyWriter.reconstruct(blockMeta, familyTokenizer, ColumnNodeType.FAMILY);
-    familyWriter.compile();
-    int numFamilyBytes = familyWriter.getNumBytes();
-    blockMeta.setNumFamilyBytes(numFamilyBytes);
-    totalBytes += numFamilyBytes;
-  }
-
-  protected void compileTags() {
-    blockMeta.setNumUniqueTags(tagsDeduplicator.size());
-    tagsDeduplicator.compile();
-    tagsTokenizer.addAll(tagsDeduplicator.getSortedRanges());
-    tagsWriter.reconstruct(blockMeta, tagsTokenizer, ColumnNodeType.TAGS);
-    tagsWriter.compile();
-    int numTagBytes = tagsWriter.getNumBytes();
-    blockMeta.setNumTagsBytes(numTagBytes);
-    totalBytes += numTagBytes;
-  }
-
-  protected void compileRows() {
-    rowWriter.reconstruct(this);
-    rowWriter.compile();
-    int numRowBytes = rowWriter.getNumBytes();
-    blockMeta.setNumRowBytes(numRowBytes);
-    blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
-    totalBytes += numRowBytes;
-  }
-
-  /********************* convenience getters ********************************/
-
-  public long getValueOffset(int index) {
-    return valueOffsets[index];
-  }
-
-  public int getValueLength(int index) {
-    return (int) (valueOffsets[index + 1] - valueOffsets[index]);
-  }
-
-  /************************* get/set *************************************/
-
-  public PrefixTreeBlockMeta getBlockMeta() {
-    return blockMeta;
-  }
-
-  public Tokenizer getRowTokenizer() {
-    return rowTokenizer;
-  }
-
-  public LongEncoder getTimestampEncoder() {
-    return timestampEncoder;
-  }
-
-  public int getTotalBytes() {
-    return totalBytes;
-  }
-
-  public long[] getTimestamps() {
-    return timestamps;
-  }
-
-  public long[] getMvccVersions() {
-    return mvccVersions;
-  }
-
-  public byte[] getTypeBytes() {
-    return typeBytes;
-  }
-
-  public LongEncoder getMvccVersionEncoder() {
-    return mvccVersionEncoder;
-  }
-
-  public ByteRangeSet getFamilySorter() {
-    return familyDeduplicator;
-  }
-
-  public ByteRangeSet getQualifierSorter() {
-    return qualifierDeduplicator;
-  }
-
-  public ByteRangeSet getTagSorter() {
-    return tagsDeduplicator;
-  }
-
-  public ColumnSectionWriter getFamilyWriter() {
-    return familyWriter;
-  }
-
-  public ColumnSectionWriter getQualifierWriter() {
-    return qualifierWriter;
-  }
-
-  public ColumnSectionWriter getTagWriter() {
-    return tagsWriter;
-  }
-
-  public RowSectionWriter getRowWriter() {
-    return rowWriter;
-  }
-
-  public ByteRange getValueByteRange() {
-    return new SimpleMutableByteRange(values, 0, totalValueBytes);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
deleted file mode 100644
index 13c9c9d..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Strings;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-import org.apache.hadoop.hbase.util.vint.UVIntTool;
-
-/**
- * <p>
- * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
- * The family and qualifier sections of the data block are made of 1 or more of these nodes.
- * </p>
- * Each node is composed of 3 sections:<br>
- * <ul>
- * <li>tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
- * <li>token[]: the actual token bytes
- * <li>parentStartPosition: the offset of the next node from the start of the family or qualifier
- * section
- * </ul>
- */
-@InterfaceAudience.Private
-public class ColumnNodeWriter{
-
-  /************* fields ****************************/
-
-  protected TokenizerNode builderNode;
-  protected PrefixTreeBlockMeta blockMeta;
-
-  protected int tokenLength;
-  protected byte[] token;
-  protected int parentStartPosition;
-  protected ColumnNodeType nodeType;
-
-
-  /*************** construct **************************/
-
-  public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
-      ColumnNodeType nodeType) {
-    this.blockMeta = blockMeta;
-    this.builderNode = builderNode;
-    this.nodeType = nodeType;
-    calculateTokenLength();
-  }
-
-
-  /************* methods *******************************/
-
-  public boolean isRoot() {
-    return parentStartPosition == 0;
-  }
-
-  private void calculateTokenLength() {
-    tokenLength = builderNode.getTokenLength();
-    token = new byte[tokenLength];
-  }
-
-  /**
-   * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
-   * placeholder.
-   * @param offsetWidthPlaceholder the placeholder
-   * @return node width
-   */
-  public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
-    int width = 0;
-    width += UVIntTool.numBytes(tokenLength);
-    width += token.length;
-    width += offsetWidthPlaceholder;
-    return width;
-  }
-
-  public void writeBytes(OutputStream os) throws IOException {
-    int parentOffsetWidth;
-    if (this.nodeType == ColumnNodeType.FAMILY) {
-      parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
-    } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
-      parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
-    } else {
-      parentOffsetWidth = blockMeta.getTagsOffsetWidth();
-    }
-    UVIntTool.writeBytes(tokenLength, os);
-    os.write(token);
-    UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
-  }
-
-  public void setTokenBytes(ByteRange source) {
-    source.deepCopySubRangeTo(0, tokenLength, token, 0);
-  }
-
-
-  /****************** standard methods ************************/
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
-    sb.append("[");
-    sb.append(Bytes.toString(token));
-    sb.append("]->");
-    sb.append(parentStartPosition);
-    return sb.toString();
-  }
-
-
-  /************************** get/set ***********************/
-
-  public void setParentStartPosition(int parentStartPosition) {
-    this.parentStartPosition = parentStartPosition;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
deleted file mode 100644
index 986bc06..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-/**
- * <p>
- * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
- * section is written after the row section, and qualifier section after family section.
- * </p>
- * The family and qualifier tries, or "column tries", are structured differently than the row trie.
- * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
- * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
- * row, the row trie contains a list of offsets into the column sections (along with pointers to
- * timestamps and other per-cell fields). These offsets point to the last column node/token that
- * comprises the column name. To assemble the column name, the trie is traversed in reverse (right
- * to left), with the rightmost tokens pointing to the start of their "parent" node which is the
- * node to the left.
- * <p>
- * This choice was made to reduce the size of the column trie by storing the minimum amount of
- * offset data. As a result, to find a specific qualifier within a row, you must do a binary search
- * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
- * encode the columns in both a forward and reverse trie, which would convert binary searches into
- * more efficient trie searches which would be beneficial for wide rows.
- * </p>
- */
-@InterfaceAudience.Private
-public class ColumnSectionWriter {
-
-  public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
-
-  /****************** fields ****************************/
-
-  private PrefixTreeBlockMeta blockMeta;
-
-  private ColumnNodeType nodeType;
-  private Tokenizer tokenizer;
-  private int numBytes = 0;
-  private ArrayList<TokenizerNode> nonLeaves;
-  private ArrayList<TokenizerNode> leaves;
-  private ArrayList<TokenizerNode> allNodes;
-  private ArrayList<ColumnNodeWriter> columnNodeWriters;
-  private List<Integer> outputArrayOffsets;
-
-
-  /*********************** construct *********************/
-
-  public ColumnSectionWriter() {
-    this.nonLeaves = Lists.newArrayList();
-    this.leaves = Lists.newArrayList();
-    this.outputArrayOffsets = Lists.newArrayList();
-  }
-
-  public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
-      ColumnNodeType nodeType) {
-    this();// init collections
-    reconstruct(blockMeta, builder, nodeType);
-  }
-
-  public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
-      ColumnNodeType nodeType) {
-    this.blockMeta = blockMeta;
-    this.tokenizer = builder;
-    this.nodeType = nodeType;
-  }
-
-  public void reset() {
-    numBytes = 0;
-    nonLeaves.clear();
-    leaves.clear();
-    outputArrayOffsets.clear();
-  }
-
-
-  /****************** methods *******************************/
-
-  public ColumnSectionWriter compile() {
-    if (this.nodeType == ColumnNodeType.FAMILY) {
-      // do nothing. max family length fixed at Byte.MAX_VALUE
-    } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
-      blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
-    } else {
-      blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
-    }
-    compilerInternals();
-    return this;
-  }
-
-  protected void compilerInternals() {
-    tokenizer.setNodeFirstInsertionIndexes();
-    tokenizer.appendNodes(nonLeaves, true, false);
-
-    tokenizer.appendNodes(leaves, false, true);
-
-    allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
-    allNodes.addAll(nonLeaves);
-    allNodes.addAll(leaves);
-
-    columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
-    for (int i = 0; i < allNodes.size(); ++i) {
-      TokenizerNode node = allNodes.get(i);
-      columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
-    }
-
-    // leaf widths are known at this point, so add them up
-    int totalBytesWithoutOffsets = 0;
-    for (int i = allNodes.size() - 1; i >= 0; --i) {
-      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
-      // leaves store all but their first token byte
-      totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
-    }
-
-    // figure out how wide our offset FInts are
-    int parentOffsetWidth = 0;
-    while (true) {
-      ++parentOffsetWidth;
-      int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
-      if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
-        numBytes = numBytesFinder;
-        break;
-      }// it fits
-    }
-    if (this.nodeType == ColumnNodeType.FAMILY) {
-      blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
-    } else if (this.nodeType == ColumnNodeType.QUALIFIER) {
-      blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
-    } else {
-      blockMeta.setTagsOffsetWidth(parentOffsetWidth);
-    }
-
-    int forwardIndex = 0;
-    for (int i = 0; i < allNodes.size(); ++i) {
-      TokenizerNode node = allNodes.get(i);
-      ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
-      int fullNodeWidth = columnNodeWriter
-          .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
-      node.setOutputArrayOffset(forwardIndex);
-      columnNodeWriter.setTokenBytes(node.getToken());
-      if (node.isRoot()) {
-        columnNodeWriter.setParentStartPosition(0);
-      } else {
-        columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
-      }
-      forwardIndex += fullNodeWidth;
-    }
-
-    tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
-  }
-
-  public void writeBytes(OutputStream os) throws IOException {
-    for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
-      columnNodeWriter.writeBytes(os);
-    }
-  }
-
-
-  /************* get/set **************************/
-
-  public ArrayList<ColumnNodeWriter> getColumnNodeWriters() {
-    return columnNodeWriters;
-  }
-
-  public int getNumBytes() {
-    return numBytes;
-  }
-
-  public int getOutputArrayOffset(int sortedIndex) {
-    return outputArrayOffsets.get(sortedIndex);
-  }
-
-  public ArrayList<TokenizerNode> getNonLeaves() {
-    return nonLeaves;
-  }
-
-  public ArrayList<TokenizerNode> getLeaves() {
-    return leaves;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
deleted file mode 100644
index 73398f6..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
-
-import org.apache.yetus.audience.InterfaceAudience;
-
-/**
- * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
- * KV.  If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
- * therefore not repeating it for each cell and saving 1 byte per cell.
- */
-@InterfaceAudience.Private
-public class CellTypeEncoder {
-
-  /************* fields *********************/
-
-  protected boolean pendingFirstType = true;
-  protected boolean allSameType = true;
-  protected byte onlyType;
-
-
-  /************* construct *********************/
-
-  public void reset() {
-    pendingFirstType = true;
-    allSameType = true;
-  }
-
-
-  /************* methods *************************/
-
-  public void add(byte type) {
-    if (pendingFirstType) {
-      onlyType = type;
-      pendingFirstType = false;
-    } else if (onlyType != type) {
-      allSameType = false;
-    }
-  }
-
-
-  /**************** get/set **************************/
-
-  public boolean areAllSameType() {
-    return allSameType;
-  }
-
-  public byte getOnlyType() {
-    return onlyType;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java
deleted file mode 100644
index 090f143..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
-
-import org.apache.yetus.audience.InterfaceAudience;
-
-/**
- * Specifies the type of columnnode writer.
- */
-@InterfaceAudience.Private
-public enum ColumnNodeType {
-  FAMILY, QUALIFIER, TAGS;
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java
deleted file mode 100644
index e5d153e..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.HashSet;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.base.Joiner;
-
-/**
- * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
- * collection of Cells.
- *
- * 1. add longs to a HashSet for fast de-duplication
- * 2. keep track of the min and max
- * 3. copy all values to a new long[]
- * 4. Collections.sort the long[]
- * 5. calculate maxDelta = max - min
- * 6. determine FInt width based on maxDelta
- * 7. PrefixTreeEncoder binary searches to find index of each value
- */
-@InterfaceAudience.Private
-public class LongEncoder {
-
-  /****************** fields ****************************/
-
-  protected HashSet<Long> uniqueValues;
-  protected long[] sortedUniqueValues;
-  protected long min, max, maxDelta;
-
-  protected int bytesPerDelta;
-  protected int bytesPerIndex;
-  protected int totalCompressedBytes;
-
-
-  /****************** construct ****************************/
-
-  public LongEncoder() {
-    this.uniqueValues = new HashSet<>();
-  }
-
-  public void reset() {
-    uniqueValues.clear();
-    sortedUniqueValues = null;
-    min = Long.MAX_VALUE;
-    max = Long.MIN_VALUE;
-    maxDelta = Long.MIN_VALUE;
-    bytesPerIndex = 0;
-    bytesPerDelta = 0;
-    totalCompressedBytes = 0;
-  }
-
-
-  /************* methods ***************************/
-
-  public void add(long timestamp) {
-    uniqueValues.add(timestamp);
-  }
-
-  public LongEncoder compile() {
-    int numUnique = uniqueValues.size();
-    if (numUnique == 1) {
-      min = CollectionUtils.getFirst(uniqueValues);
-      sortedUniqueValues = new long[] { min };
-      return this;
-    }
-
-    sortedUniqueValues = new long[numUnique];
-    int lastIndex = -1;
-    for (long value : uniqueValues) {
-      sortedUniqueValues[++lastIndex] = value;
-    }
-    Arrays.sort(sortedUniqueValues);
-    min = ArrayUtils.getFirst(sortedUniqueValues);
-    max = ArrayUtils.getLast(sortedUniqueValues);
-    maxDelta = max - min;
-    if (maxDelta > 0) {
-      bytesPerDelta = UFIntTool.numBytes(maxDelta);
-    } else {
-      bytesPerDelta = 0;
-    }
-
-    int maxIndex = numUnique - 1;
-    bytesPerIndex = UFIntTool.numBytes(maxIndex);
-
-    totalCompressedBytes = numUnique * bytesPerDelta;
-
-    return this;
-  }
-
-  public long getDelta(int index) {
-    if (sortedUniqueValues.length == 0) {
-      return 0;
-    }
-    return sortedUniqueValues[index] - min;
-  }
-
-  public int getIndex(long value) {
-    // should always find an exact match
-    return Arrays.binarySearch(sortedUniqueValues, value);
-  }
-
-  public void writeBytes(OutputStream os) throws IOException {
-    for (int i = 0; i < sortedUniqueValues.length; ++i) {
-      long delta = sortedUniqueValues[i] - min;
-      UFIntTool.writeBytes(bytesPerDelta, delta, os);
-    }
-  }
-
-  //convenience method for tests
-  public byte[] getByteArray() throws IOException{
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    writeBytes(baos);
-    return baos.toByteArray();
-  }
-
-  public int getOutputArrayLength() {
-    return sortedUniqueValues.length * bytesPerDelta;
-  }
-
-  public int getNumUniqueValues() {
-    return sortedUniqueValues.length;
-  }
-
-
-  /******************* Object methods **********************/
-
-  @Override
-  public String toString() {
-    if (ArrayUtils.isEmpty(sortedUniqueValues)) {
-      return "[]";
-    }
-    return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
-  }
-
-
-  /******************** get/set **************************/
-
-  public long getMin() {
-    return min;
-  }
-
-  public int getBytesPerDelta() {
-    return bytesPerDelta;
-  }
-
-  public int getBytesPerIndex() {
-    return bytesPerIndex;
-  }
-
-  public int getTotalCompressedBytes() {
-    return totalCompressedBytes;
-  }
-
-  public long[] getSortedUniqueTimestamps() {
-    return sortedUniqueValues;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
deleted file mode 100644
index 6e114e9..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hadoop.hbase.util.ByteRangeUtils;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-import org.apache.hadoop.hbase.util.vint.UVIntTool;
-
-/**
- * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
- * Please see the write() method for the order in which data is written.
- */
-@InterfaceAudience.Private
-public class RowNodeWriter{
-  protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
-
-  /********************* fields ******************************/
-
-  protected PrefixTreeEncoder prefixTreeEncoder;
-  protected PrefixTreeBlockMeta blockMeta;
-  protected TokenizerNode tokenizerNode;
-
-  protected int tokenWidth;
-  protected int fanOut;
-  protected int numCells;
-
-  protected int width;
-
-
-  /*********************** construct *************************/
-
-  public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
-    reconstruct(keyValueBuilder, tokenizerNode);
-  }
-
-  public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
-    this.prefixTreeEncoder = prefixTreeEncoder;
-    reset(tokenizerNode);
-  }
-
-  public void reset(TokenizerNode node) {
-    this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
-    this.tokenizerNode = node;
-    this.tokenWidth = 0;
-    this.fanOut = 0;
-    this.numCells = 0;
-    this.width = 0;
-    calculateOffsetsAndLengths();
-  }
-
-
-  /********************* methods ****************************/
-
-  protected void calculateOffsetsAndLengths(){
-    tokenWidth = tokenizerNode.getTokenLength();
-    if(!tokenizerNode.isRoot()){
-      --tokenWidth;//root has no parent
-    }
-    fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
-    numCells = tokenizerNode.getNumOccurrences();
-  }
-
-  public int calculateWidth(){
-    calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
-    return width;
-  }
-
-  public int calculateWidthOverrideOffsetWidth(int offsetWidth){
-    width = 0;
-    width += UVIntTool.numBytes(tokenWidth);
-    width += tokenWidth;
-
-    width += UVIntTool.numBytes(fanOut);
-    width += fanOut;
-
-    width += UVIntTool.numBytes(numCells);
-
-    if(tokenizerNode.hasOccurrences()){
-      int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
-        + blockMeta.getQualifierOffsetWidth()
-        + blockMeta.getTagsOffsetWidth()
-        + blockMeta.getTimestampIndexWidth()
-        + blockMeta.getMvccVersionIndexWidth()
-        + blockMeta.getKeyValueTypeWidth()
-        + blockMeta.getValueOffsetWidth()
-        + blockMeta.getValueLengthWidth();
-      width += numCells * fixedBytesPerCell;
-    }
-
-    if (!tokenizerNode.isLeaf()) {
-      width += fanOut * offsetWidth;
-    }
-
-    return width;
-  }
-
-
-  /*********************** writing the compiled structure to the OutputStream ***************/
-
-  public void write(OutputStream os) throws IOException{
-    //info about this row trie node
-    writeRowToken(os);
-    writeFan(os);
-    writeNumCells(os);
-
-    //UFInt indexes and offsets for each cell in the row (if nub or leaf)
-    writeFamilyNodeOffsets(os);
-    writeQualifierNodeOffsets(os);
-    writeTagNodeOffsets(os);
-    writeTimestampIndexes(os);
-    writeMvccVersionIndexes(os);
-    writeCellTypes(os);
-    writeValueOffsets(os);
-    writeValueLengths(os);
-    //offsets to the children of this row trie node (if branch or nub)
-    writeNextRowTrieNodeOffsets(os);
-  }
-
-
-  /**
-   * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
-   * fields can reproduce all the row keys that compose the block.
-   */
-
-  /**
-   * UVInt: tokenWidth
-   * bytes: token
-   */
-  protected void writeRowToken(OutputStream os) throws IOException {
-    UVIntTool.writeBytes(tokenWidth, os);
-    int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
-    ByteRangeUtils.write(os, tokenizerNode.getToken(), tokenStartIndex);
-  }
-
-  /**
-   * UVInt: numFanBytes/fanOut
-   * bytes: each fan byte
-   */
-  public void writeFan(OutputStream os) throws IOException {
-    UVIntTool.writeBytes(fanOut, os);
-    if (fanOut <= 0) {
-      return;
-    }
-    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
-    for (int i = 0; i < children.size(); ++i) {
-      TokenizerNode child = children.get(i);
-      os.write(child.getToken().get(0));// first byte of each child's token
-    }
-  }
-
-  /**
-   * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
-   */
-  protected void writeNumCells(OutputStream os) throws IOException {
-    UVIntTool.writeBytes(numCells, os);
-  }
-
-
-  /**
-   * The following methods write data for each cell in the row, mostly consisting of indexes or
-   * offsets into the timestamp/column data structures that are written in the middle of the block.
-   * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
-   * search of a particular column/timestamp combination.
-   * <p>
-   * Branch nodes will not have any data in these sections.
-   * </p>
-   */
-
-  protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
-    if (blockMeta.getFamilyOffsetWidth() <= 0) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
-          .getFirstInsertionIndex() + i : 0;
-      int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
-        cellInsertionIndex);
-      int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
-        sortedIndex);
-      UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
-    }
-  }
-
-  protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
-    if (blockMeta.getQualifierOffsetWidth() <= 0) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
-        cellInsertionIndex);
-      int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
-        sortedIndex);
-      UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
-    }
-  }
-
-  protected void writeTagNodeOffsets(OutputStream os) throws IOException {
-    if (blockMeta.getTagsOffsetWidth() <= 0) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId(
-        cellInsertionIndex);
-      int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset(
-        sortedIndex);
-      UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os);
-    }
-  }
-
-  protected void writeTimestampIndexes(OutputStream os) throws IOException {
-    if (blockMeta.getTimestampIndexWidth() <= 0) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
-      int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
-      UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
-    }
-  }
-
-  protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
-    if (blockMeta.getMvccVersionIndexWidth() <= 0) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
-      int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
-      UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
-    }
-  }
-
-  protected void writeCellTypes(OutputStream os) throws IOException {
-    if (blockMeta.isAllSameType()) {
-      return;
-    }
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
-    }
-  }
-
-  protected void writeValueOffsets(OutputStream os) throws IOException {
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
-      UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
-    }
-  }
-
-  protected void writeValueLengths(OutputStream os) throws IOException {
-    for (int i = 0; i < numCells; ++i) {
-      int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
-      int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
-      UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
-    }
-  }
-
-  /**
-   * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
-   */
-  protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
-    ArrayList<TokenizerNode> children = tokenizerNode.getChildren();
-    for (int i = 0; i < children.size(); ++i) {
-      TokenizerNode child = children.get(i);
-      int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
-      UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
deleted file mode 100644
index 3d9fa13..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
-import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hadoop.hbase.util.vint.UFIntTool;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-/**
- * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
- * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
- * also contains references to offsets in the other sections of the data block that enable the
- * decoder to match a row key with its qualifier, timestamp, type, value, etc.
- * <p>
- * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
- * internals of each row node.
- */
-@InterfaceAudience.Private
-public class RowSectionWriter {
-
-  /***************** fields **************************/
-
-  protected PrefixTreeEncoder prefixTreeEncoder;
-
-  protected PrefixTreeBlockMeta blockMeta;
-
-  protected int numBytes;
-
-  protected ArrayList<TokenizerNode> nonLeaves;
-  protected ArrayList<TokenizerNode> leaves;
-
-  protected ArrayList<RowNodeWriter> leafWriters;
-  protected ArrayList<RowNodeWriter> nonLeafWriters;
-
-  protected int numLeafWriters;
-  protected int numNonLeafWriters;
-
-
-  /********************* construct **********************/
-
-  public RowSectionWriter() {
-    this.nonLeaves = Lists.newArrayList();
-    this.leaves = Lists.newArrayList();
-    this.leafWriters = Lists.newArrayList();
-    this.nonLeafWriters = Lists.newArrayList();
-  }
-
-  public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
-    reconstruct(prefixTreeEncoder);
-  }
-
-  public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
-    this.prefixTreeEncoder = prefixTreeEncoder;
-    this.blockMeta = prefixTreeEncoder.getBlockMeta();
-    reset();
-  }
-
-  public void reset() {
-    numBytes = 0;
-    nonLeaves.clear();
-    leaves.clear();
-    numLeafWriters = 0;
-    numNonLeafWriters = 0;
-  }
-
-
-  /****************** methods *******************************/
-
-  public RowSectionWriter compile() {
-    blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
-    prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
-
-    prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
-    prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
-
-    // track the starting position of each node in final output
-    int negativeIndex = 0;
-
-    // create leaf writer nodes
-    // leaf widths are known at this point, so add them up
-    int totalLeafBytes = 0;
-    for (int i = leaves.size() - 1; i >= 0; --i) {
-      TokenizerNode leaf = leaves.get(i);
-      RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
-      ++numLeafWriters;
-      // leaves store all but their first token byte
-      int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
-      totalLeafBytes += leafNodeWidth;
-      negativeIndex += leafNodeWidth;
-      leaf.setNegativeIndex(negativeIndex);
-    }
-
-    int totalNonLeafBytesWithoutOffsets = 0;
-    int totalChildPointers = 0;
-    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
-      TokenizerNode nonLeaf = nonLeaves.get(i);
-      RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
-      ++numNonLeafWriters;
-      totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
-      totalChildPointers += nonLeaf.getNumChildren();
-    }
-
-    // figure out how wide our offset FInts are
-    int offsetWidth = 0;
-    while (true) {
-      ++offsetWidth;
-      int offsetBytes = totalChildPointers * offsetWidth;
-      int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
-      if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
-        // it fits
-        numBytes = totalRowBytes;
-        break;
-      }
-    }
-    blockMeta.setNextNodeOffsetWidth(offsetWidth);
-
-    // populate negativeIndexes
-    for (int i = nonLeaves.size() - 1; i >= 0; --i) {
-      TokenizerNode nonLeaf = nonLeaves.get(i);
-      int writerIndex = nonLeaves.size() - i - 1;
-      RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
-      int nodeWidth = nonLeafWriter.calculateWidth();
-      negativeIndex += nodeWidth;
-      nonLeaf.setNegativeIndex(negativeIndex);
-    }
-
-    return this;
-  }
-
-  protected RowNodeWriter initializeWriter(List<RowNodeWriter> list, int index,
-      TokenizerNode builderNode) {
-    RowNodeWriter rowNodeWriter = null;
-    //check if there is an existing node we can recycle
-    if (index >= list.size()) {
-      //there are not enough existing nodes, so add a new one which will be retrieved below
-      list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
-    }
-    rowNodeWriter = list.get(index);
-    rowNodeWriter.reset(builderNode);
-    return rowNodeWriter;
-  }
-
-
-  public void writeBytes(OutputStream os) throws IOException {
-    for (int i = numNonLeafWriters - 1; i >= 0; --i) {
-      RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
-      nonLeafWriter.write(os);
-    }
-    // duplicates above... written more for clarity right now
-    for (int i = numLeafWriters - 1; i >= 0; --i) {
-      RowNodeWriter leafWriter = leafWriters.get(i);
-      leafWriter.write(os);
-    }
-  }
-
-
-  /***************** static ******************************/
-
-  protected static ArrayList<TokenizerNode> filterByLeafAndReverse(
-      ArrayList<TokenizerNode> ins, boolean leaves) {
-    ArrayList<TokenizerNode> outs = Lists.newArrayList();
-    for (int i = ins.size() - 1; i >= 0; --i) {
-      TokenizerNode n = ins.get(i);
-      if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
-        outs.add(ins.get(i));
-      }
-    }
-    return outs;
-  }
-
-
-  /************* get/set **************************/
-
-  public int getNumBytes() {
-    return numBytes;
-  }
-
-  public ArrayList<TokenizerNode> getNonLeaves() {
-    return nonLeaves;
-  }
-
-  public ArrayList<TokenizerNode> getLeaves() {
-    return leaves;
-  }
-
-  public ArrayList<RowNodeWriter> getNonLeafWriters() {
-    return nonLeafWriters;
-  }
-
-  public ArrayList<RowNodeWriter> getLeafWriters() {
-    return leafWriters;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hbase/blob/f8c58930/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
----------------------------------------------------------------------
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
deleted file mode 100644
index 870409c..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-
-import org.apache.hadoop.hbase.shaded.com.google.common.collect.Lists;
-
-/**
- * Data structure used in the first stage of PrefixTree encoding:
- * <ul>
- * <li>accepts a sorted stream of ByteRanges
- * <li>splits them into a set of tokens, each held by a {@link TokenizerNode}
- * <li>connects the TokenizerNodes via standard java references
- * <li>keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
- * </ul>
- * <p><br>
- * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
- * encoding.
- */
-@InterfaceAudience.Private
-public class Tokenizer{
-
-  /***************** fields **************************/
-
-  protected int numArraysAdded = 0;
-  protected long lastNodeId = -1;
-  protected ArrayList<TokenizerNode> nodes;
-  protected int numNodes;
-  protected TokenizerNode root;
-  protected byte[] tokens;
-  protected int tokensLength;
-
-  protected int maxElementLength = 0;
-  // number of levels in the tree assuming root level is 0
-  protected int treeDepth = 0;
-
-
-  /******************* construct *******************/
-
-  public Tokenizer() {
-    this.nodes = Lists.newArrayList();
-    this.tokens = new byte[0];
-  }
-
-  public void reset() {
-    numArraysAdded = 0;
-    lastNodeId = -1;
-    numNodes = 0;
-    tokensLength = 0;
-    root = null;
-    maxElementLength = 0;
-    treeDepth = 0;
-  }
-
-
-  /***************** building *************************/
-
-  public void addAll(ArrayList<ByteRange> sortedByteRanges) {
-    for (int i = 0; i < sortedByteRanges.size(); ++i) {
-      ByteRange byteRange = sortedByteRanges.get(i);
-      addSorted(byteRange);
-    }
-  }
-
-  public void addSorted(final ByteRange bytes) {
-    ++numArraysAdded;
-    if (bytes.getLength() > maxElementLength) {
-      maxElementLength = bytes.getLength();
-    }
-    if (root == null) {
-      // nodeDepth of firstNode (non-root) is 1
-      root = addNode(null, 1, 0, bytes, 0);
-    } else {
-      root.addSorted(bytes);
-    }
-  }
-
-  public void incrementNumOccurrencesOfLatestValue(){
-    CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
-  }
-
-  protected long nextNodeId() {
-    return ++lastNodeId;
-  }
-
-  protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
-      final ByteRange token, int inputTokenOffset) {
-    int inputTokenLength = token.getLength() - inputTokenOffset;
-    int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
-    TokenizerNode node = null;
-    if (nodes.size() <= numNodes) {
-      node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
-          inputTokenLength);
-      nodes.add(node);
-    } else {
-      node = nodes.get(numNodes);
-      node.reset();
-      node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
-    }
-    ++numNodes;
-    return node;
-  }
-
-  protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
-    int newOffset = tokensLength;
-    int inputTokenLength = token.getLength() - inputTokenOffset;
-    int newMinimum = tokensLength + inputTokenLength;
-    tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
-    token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
-    tokensLength += inputTokenLength;
-    return newOffset;
-  }
-
-  protected void submitMaxNodeDepthCandidate(int nodeDepth) {
-    if (nodeDepth > treeDepth) {
-      treeDepth = nodeDepth;
-    }
-  }
-
-
-  /********************* read ********************/
-
-  public int getNumAdded(){
-    return numArraysAdded;
-  }
-
-  // for debugging
-  public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves) {
-    ArrayList<TokenizerNode> nodes = Lists.newArrayList();
-    root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
-    return nodes;
-  }
-
-  public void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves,
-      boolean includeLeaves) {
-    root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
-  }
-
-  public List<byte[]> getArrays() {
-    List<TokenizerNode> nodes = new ArrayList<>();
-    root.appendNodesToExternalList(nodes, true, true);
-    List<byte[]> byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
-    for (int i = 0; i < nodes.size(); ++i) {
-      TokenizerNode node = nodes.get(i);
-      for (int j = 0; j < node.getNumOccurrences(); ++j) {
-        byte[] byteArray = node.getNewByteArray();
-        byteArrays.add(byteArray);
-      }
-    }
-    return byteArrays;
-  }
-
-  //currently unused, but working and possibly useful in the future
-  public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
-      int keyLength) {
-    root.getNode(resultHolder, key, keyOffset, keyLength);
-  }
-
-
-  /********************** write ***************************/
-
-  public Tokenizer setNodeFirstInsertionIndexes() {
-    root.setInsertionIndexes(0);
-    return this;
-  }
-
-  public Tokenizer appendOutputArrayOffsets(List<Integer> offsets) {
-    root.appendOutputArrayOffsets(offsets);
-    return this;
-  }
-
-
-  /********************* print/debug ********************/
-
-  protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder();
-    sb.append(getStructuralString());
-    if (INCLUDE_FULL_TREE_IN_TO_STRING) {
-      for (byte[] bytes : getArrays()) {
-        if (sb.length() > 0) {
-          sb.append("\n");
-        }
-        sb.append(Bytes.toString(bytes));
-      }
-    }
-    return sb.toString();
-  }
-
-  public String getStructuralString() {
-    List<TokenizerNode> nodes = getNodes(true, true);
-    StringBuilder sb = new StringBuilder();
-    for (TokenizerNode node : nodes) {
-      String line = node.getPaddedTokenAndOccurrenceString();
-      sb.append(line + "\n");
-    }
-    return sb.toString();
-  }
-
-
-  /****************** get/set ************************/
-
-  public TokenizerNode getRoot() {
-    return root;
-  }
-
-  public int getMaxElementLength() {
-    return maxElementLength;
-  }
-
-  public int getTreeDepth() {
-    return treeDepth;
-  }
-
-}


Mime
View raw message