hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r910334 [1/2] - in /hadoop/hbase/branches/0.20: ./ src/contrib/indexed/ src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/ src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/ src/contrib/indexed/src/java/org/a...
Date Mon, 15 Feb 2010 21:00:05 GMT
Author: stack
Date: Mon Feb 15 21:00:03 2010
New Revision: 910334

URL: http://svn.apache.org/viewvc?rev=910334&view=rev
Log:
HBASE-2202-2207, -2227, 2167 A bunch of fixes for IHBase -- or IdxHBase

Modified:
    hadoop/hbase/branches/0.20/CHANGES.txt
    hadoop/hbase/branches/0.20/src/contrib/indexed/build.xml
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/idx/support/sets/BitSet.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/client/idx/TestIdxIndexDescriptor.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestCompleteIndex.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxExpressionEvaluator.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxRegion.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/idx/support/sets/IntSetBaseTestCase.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/idx/support/sets/TestBitSet.java
    hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/idx/support/sets/TestSparseBitSet.java
    hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/HConstants.java
    hadoop/hbase/branches/0.20/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java
    hadoop/hbase/branches/0.20/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java

Modified: hadoop/hbase/branches/0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/CHANGES.txt?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.20/CHANGES.txt Mon Feb 15 21:00:03 2010
@@ -7,6 +7,16 @@
    HBASE-2173  New idx javadoc not included with the rest
    HBASE-2199  hbase.client.tableindexed.IndexSpecification,
                lines 72-73 should be reversed (Adrian Popescu via Stack)
+   HBASE-2202  IdxRegion crash when binary characters
+   HBASE-2203  [IHBase] Include only those columns required for indexed scan
+   HBASE-2204  [IHBASE] Index expression evaluation should fail with a
+               DoNotRetryException in case of an invalid index specification
+   HBASE-2205  [IHBASE] Updated Idx pacakge javadocs
+   HBASE-2206  [IHBASE] Idx memory allocation fix
+   HBASE-2207  [IHBASE] Index partial column values
+   HBASE-2227  [IHBASE] Idx Expression functionality is incompatible with
+               SingleColumnValueFilter
+   HBASE-2167  PE for IHBase
 
   IMPROVEMENTS
    HBASE-2180  Bad read performance from synchronizing hfile.fddatainputstream

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/build.xml?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/build.xml (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/build.xml Mon Feb 15 21:00:03 2010
@@ -19,19 +19,32 @@
 
 <!-- 
 Before you can run these subtargets directly, you need 
-to call at top-level: ant deploy-contrib compile-core-test
+to call at top-level: ant compile-core-test
 -->
 <project name="indexed" default="jar">
   <import file="../build-contrib.xml"/>
 
-  <property name="lib.dir" value="${basedir}/lib"/> 
+  <property name="lib.dir" value="${basedir}/lib"/>
+
+  <!-- Override ../build-contrib.xml jar -->
+  <target name="jar" depends="compile, compile-test" unless="skip.contrib">
+    <echo message="contrib: ${name}"/>
+    <jar
+      jarfile="${build.dir}/hbase-${version}-${name}.jar"
+      basedir="${build.classes}"
+    />
+    <jar jarfile="${build.dir}/hbase-${version}-${name}-test.jar" >
+      <fileset dir="${build.test}" includes="org/**" />
+      <fileset dir="${src.test}" includes="**/*.properties" />
+    </jar>
+  </target>
 
   <!--Override ../build-contrib.xml package-->
   <target name="package" depends="jar" unless="skip.contrib">
     <mkdir dir="${dist.dir}/contrib/${name}"/>
     <copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
       <fileset dir="${build.dir}">
-        <include name="hbase-${version}-${name}.jar" />
+        <include name="hbase-${version}-${name}*.jar" />
       </fileset>
     </copy>
     <mkdir dir="${dist.dir}/contrib/${name}/lib"/>

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java Mon Feb 15 21:00:03 2010
@@ -20,8 +20,7 @@
 package org.apache.hadoop.hbase.client.idx;
 
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.VersionedWritable;
 import org.apache.hadoop.io.WritableUtils;
 
 import java.io.DataInput;
@@ -31,8 +30,28 @@
 
 /**
  * The description of an indexed column family qualifier.
+ * <p>
+ * The description is composed of the following properties:
+ * <ol>
+ * <li> The qualifier name - specified which qualifier to index. The values
+ * stored to this qualifier will serve as index keys.
+ * <li>  The qualifier type - type information for the qualifier. The type
+ * information allows for custom ordering of index keys (which are qualifier
+ * values) which may come handy when range queries are executed.
+ * <li> offset - combine this property with the length property to allow partial
+ * value extraction. Useful for keeping the index size small while for qualifiers
+ * with large values. the offset specifies the starting point in the value from
+ * which to extract the index key
+ * <li> length - see also offset's description, the length property allows
+ * to limit the number of bytes extracted to serve as index keys. If the bytes
+ * are random a length of 1 or 2 bytes would yield very good results.
+ * </ol>
+ * </p>
  */
-public class IdxIndexDescriptor implements Writable {
+public class IdxIndexDescriptor extends VersionedWritable {
+
+  private static final byte VERSION = 1;
+
   /**
    * Qualifier name;
    */
@@ -45,6 +64,18 @@
   private IdxQualifierType qualifierType;
 
   /**
+   * Where to grab the column qualifier's value from. The default is from
+   * its first byte.
+   */
+  private int offset = 0;
+
+  /**
+   * Up-to where to grab the column qualifier's value. The default is
+   * all of it. A positive number would indicate a set limit.
+   */
+  private int length = -1;
+
+  /**
    * Empty constructor to support the writable interface - DO NOT USE.
    */
   public IdxIndexDescriptor() {
@@ -62,6 +93,22 @@
   }
 
   /**
+   * Construct a new index descriptor.
+   *
+   * @param qualifierName the qualifier name
+   * @param qualifierType the qualifier type
+   * @param offset        the offset (from kv value start) from which to extract the
+   *                      index key
+   * @param length        the length to extract (everything by default)
+   */
+  public IdxIndexDescriptor(byte[] qualifierName, IdxQualifierType qualifierType,
+    int offset, int length) {
+    this(qualifierName, qualifierType);
+    this.offset = offset;
+    this.length = length;
+  }
+
+  /**
    * The column family qualifier name.
    * @return column family qualifier name
    */
@@ -94,12 +141,51 @@
   }
 
   /**
+   * The offset from which to extract the values.
+   *
+   * @return the current offset value.
+   */
+  public int getOffset() {
+    return offset;
+  }
+
+  /**
+   * Sets the offset
+   *
+   * @param offset the offset from which to extract the values.
+   */
+  public void setOffset(int offset) {
+    this.offset = offset;
+  }
+
+  /**
+   * The length of the block extracted from the qualifier's value.
+   *
+   * @return the length of the extracted value
+   */
+  public int getLength() {
+    return length;
+  }
+
+  /**
+   * The length of the extracted value.
+   *
+   * @param length the length of the extracted value.
+   */
+  public void setLength(int length) {
+    this.length = length;
+  }
+
+  /**
    * {@inheritDoc}
    */
   @Override
   public void write(DataOutput dataOutput) throws IOException {
+    super.write(dataOutput);
     Bytes.writeByteArray(dataOutput, qualifierName);
     WritableUtils.writeEnum(dataOutput, qualifierType);
+    dataOutput.writeInt(offset);
+    dataOutput.writeInt(length);
   }
 
   /**
@@ -107,8 +193,19 @@
    */
   @Override
   public void readFields(DataInput dataInput) throws IOException {
+    super.readFields(dataInput);
     qualifierName = Bytes.readByteArray(dataInput);
     qualifierType = WritableUtils.readEnum(dataInput, IdxQualifierType.class);
+    this.offset = dataInput.readInt();
+    this.length = dataInput.readInt();
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public byte getVersion() {
+    return VERSION;
   }
 
   /**
@@ -123,6 +220,12 @@
 
     if (!Arrays.equals(qualifierName, that.qualifierName)) return false;
 
+    if (this.qualifierType != that.qualifierType) return false;
+
+    if (this.offset != that.offset) return false;
+
+    if (this.length != that.length) return false;
+
     return true;
   }
 

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java Mon Feb 15 21:00:03 2010
@@ -34,6 +34,7 @@
   private byte[] qualifier;
   private Operator operator;
   private byte[] value;
+  private boolean includeMissing = true;
 
   /**
    * No args constructor.
@@ -53,14 +54,46 @@
   }
 
   /**
-   * Full constructor with all required fields.
+   * Convenience constrcutor that takes strings and converts from to byte[].
+   *
    * @param columnName the column name
    * @param qualifier  the column qualifier
    * @param operator   the operator
    * @param value      the value
+   * @param includeMissing include missing ids
+   */
+  public Comparison(String columnName, String qualifier, Operator operator,
+    byte[] value, boolean includeMissing) {
+    this(Bytes.toBytes(columnName), Bytes.toBytes(qualifier), operator,
+      value, includeMissing);
+  }
+
+  /**
+   * Partial constructor with all required fields.
+   *
+   * @param columnName     the column name
+   * @param qualifier      the column qualifier
+   * @param operator       the operator
+   * @param value          the value
+   */
+  public Comparison(byte[] columnName, byte[] qualifier, Operator operator,
+    byte[] value) {
+    this(columnName, qualifier, operator, value, true);
+  }
+
+  /**
+   * Full constructor with all fields.
+   *
+   * @param columnName     the column name
+   * @param qualifier      the column qualifier
+   * @param operator       the operator
+   * @param value          the value
+   * @param includeMissing should the comparison result include ids which are
+   *                       missing from the index. Same idea as {@link org.apache.hadoop.hbase.filter.SingleColumnValueFilter#filterIfMissing}.
+   *                       Default value is true.
    */
   public Comparison(byte[] columnName, byte[] qualifier, Operator operator,
-                    byte[] value) {
+    byte[] value, boolean includeMissing) {
     assert columnName != null : "The columnName must not be null";
     assert qualifier != null : "The qualifier must not be null";
     assert operator != null : "The operator must not be null";
@@ -70,6 +103,7 @@
     this.qualifier = qualifier;
     this.operator = operator;
     this.value = value;
+    this.includeMissing = includeMissing;
   }
 
   /**
@@ -106,6 +140,15 @@
   }
 
   /**
+   * Gets whether to include missing columns or not.
+   *
+   * @return true to include missing columns.
+   */
+  public boolean getIncludeMissing() {
+    return includeMissing;
+  }
+
+  /**
    * {@inheritDoc}
    */
   @Override
@@ -163,24 +206,28 @@
    */
   public enum Operator {
     /**
-     * The equals function.
+     * The equals operator.
      */
     EQ,
     /**
-     * The greater than function.
+     * The greater than operator.
      */
     GT,
     /**
-     * The greater than or equals function.
+     * The greater than or equals operator.
      */
     GTE,
     /**
-     * The less than function.
+     * The less than operator.
      */
     LT,
     /**
-     * The less than or equals function.
+     * The less than or equals operator.
+     */
+    LTE,
+    /**
+     * The not equals operator.
      */
-    LTE
+    NEQ,
   }
 }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java Mon Feb 15 21:00:03 2010
@@ -69,13 +69,49 @@
   /**
    * Creates and returns an {@link Comparison}
    * instance.
-   * @param family the column family name
-   * @param qualifier  the qualifier
-   * @param operator   the operator
-   * @param value      the value
+   *
+   * @param family         the column family name
+   * @param qualifier      the qualifier
+   * @param operator       the operator
+   * @param value          the value
+   * @param includeMissing include ids missing from the index.
+   *                       Same idea as {@link org.apache.hadoop.hbase.filter.SingleColumnValueFilter#filterIfMissing}.
+   *                       true by default
+   * @return the instance
+   */
+  public static Comparison comparison(byte[] family, byte[] qualifier, Comparison.Operator operator, byte[] value, boolean includeMissing) {
+    return new Comparison(family, qualifier, operator, value, includeMissing);
+  }
+
+  /**
+   * Creates and returns an {@link Comparison}
+   * instance.
+   *
+   * @param family    the column family name
+   * @param qualifier the qualifier
+   * @param operator  the operator
+   * @param value     the value
    * @return the instance
    */
   public static Comparison comparison(String family, String qualifier, Comparison.Operator operator, byte[] value) {
     return new Comparison(family, qualifier, operator, value);
   }
+
+  /**
+   * Creates and returns an {@link Comparison}
+   * instance.
+   *
+   * @param family         the column family name
+   * @param qualifier      the qualifier
+   * @param operator       the operator
+   * @param value          the value
+   * @param includeMissing include ids missing from the index.
+   *                       Same idea as {@link org.apache.hadoop.hbase.filter.SingleColumnValueFilter#filterIfMissing}.
+   *                       true by default
+   * @return the instance
+   */
+  public static Comparison comparison(String family, String qualifier, Comparison.Operator operator, byte[] value, boolean includeMissing) {
+    return new Comparison(family, qualifier, operator, value, includeMissing);
+  }
+
 }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html Mon Feb 15 21:00:03 2010
@@ -21,280 +21,281 @@
 <head />
 <body bgcolor="white">
 <h2>Indexed HBase</h2>
-      <p>
+      <p>
         This page gives the high levels for the indexed hbase contrib.
         It is assumed that the reader has in-depth knowledge of HBase.
-      </p>
+      </p>
 <h2>Table Of Contents</h2>
-        <ol>
-          <li>
-            <a href=#IndexedHBase-IndexedHBase>Indexed HBase</a>
-            <ol>
-              <li>
-                <a href=#IndexedHBase-Purpose>Purpose</a>
-              </li>
-              <li>
-                <a href=#IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F>Why do we think IHbase outperforms ITHBase?</a>
-              </li>
-            </ol>
-          </li>
-          <li>
-            <a href=#IndexedHBase-Usage>Usage</a>
-          </li>
-          <li>
-            <a href=#IndexedHBase-Implementationnotes>Implementation notes</a>
-          </li>
-        </ol>
+        <ol>
+          <li>
+            <a href=#IndexedHBase-IndexedHBase>Indexed HBase</a>
+            <ol>
+              <li>
+                <a href=#IndexedHBase-Purpose>Purpose</a>
+              </li>
+              <li>
+                <a href=#IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F>Why do we think IHbase outperforms ITHBase?</a>
+              </li>
+            </ol>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Usage>Usage</a>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Implementationnotes>Implementation notes</a>
+          </li>
+        </ol>
 
-      <h3>
-        <a name=IndexedHBase-Purpose></a>Purpose
-      </h3>
-      <p>
+      <h3>
+        <a name=IndexedHBase-Purpose></a>Purpose
+      </h3>
+      <p>
         The goal of the indexed HBase contrib is to speed up scans by indexing HBase columns.
         Indexed HBase (IHBase) is different from the indexed tables in transactional HBase (ITHBase):
         while the indexes in ITHBase are, in fact, hbase tables using the indexed column's values
         as row keys, IHBase creates indexes at the region level.
-        The differences are summarized in the table below.
-      </p>
-      <table >
-        <tbody>
-        <tr>
-          <th >
-            Feature
-          </th>
-          <th >
-            ITHBase
-          </th>
-          <th >
-            IHBase
-          </th>
-          <th >
-            Comment
-          </th>
-        </tr>
-        <tr>
-          <td >
-            global ordering
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
-            no
-          </td>
-          <td >
+        The differences are summarized in the table below.
+      </p>
+      <table >
+        <tbody>
+        <tr>
+          <th >
+            Feature
+          </th>
+          <th >
+            ITHBase
+          </th>
+          <th >
+            IHBase
+          </th>
+          <th >
+            Comment
+          </th>
+        </tr>
+        <tr>
+          <td >
+            global ordering
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
             IHBase has an index for each region. The flip side of not having global ordering
             is compatibility with the good old HRegion: results are coming back in row
-            order (and not value order as in ITHBase)
-          </td>
-        </tr>
-        <tr>
-          <td >
-            Full table scan?
-          </td>
-          <td >
-            no
-          </td>
-          <td >
-            no
-          </td>
-          <td >
-            THbase does a partial scan on the index table. ITHBase supports specifying start/end rows to limit the number of scanned regions
-          </td>
-        </tr>
-        <tr>
-          <td >
-            Multiple Index Usage<br clear=all>
-          </td>
-          <td >
-            no
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
-            IHBase can take advantage of multiple indexes in the same scan. IHBase IdxScan object accepts an Expression which allows intersection/unison of several indexed column criteria
-          </td>
-        </tr>
-        <tr>
-          <td >
-            Extra disk storage
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
-            no
-          </td>
-          <td >
-            IHBase indexes are created when the region starts/flushes and do not require any extra storage
-          </td>
-        </tr>
-        <tr>
-          <td >
-            Extra RAM
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
+            order (and not value order as in ITHBase)
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Full table scan?
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            THbase does a partial scan on the index table. ITHBase supports specifying start/end rows to limit the number of scanned regions
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Multiple Index Usage<br clear=all>
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            IHBase can take advantage of multiple indexes in the same scan. IHBase IdxScan object accepts an Expression which allows intersection/unison of several indexed column criteria
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra disk storage
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            IHBase indexes are created when the region starts/flushes and do not require any extra storage
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra RAM
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
             IHBase indexes are in memory and hence increase the memory overhead.
-            THBbase indexes increase the number of regions each region server has to support thus costing memory too
-          </td>
-        </tr>
-        <tr>
-          <td >
-            Parallel scanning support
-          </td>
-          <td >
-            no
-          </td>
-          <td >
-            yes
-          </td>
-          <td >
+            THBbase indexes increase the number of regions each region server has to support thus costing memory too
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Parallel scanning support
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
             In ITHBase the index table needs to be consulted and then GETs are issued
             for each matching row. The behavior of IHBase (as perceived by the client)
             is no different than a regular scan and hence supports parallel
             scanning seamlessly. <font color=darkgray>parallel GET can be implemented
-            to speedup THbase scans</font>
-          </td>
-        </tr>
-        </tbody>
-      </table>
-      <h3>
-        <a name=IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F></a>Why do we think IHbase outperforms THBase?
-      </h3>
-      <ol>
-        <li>
-          More flexible:
-          <ol>
-            <li>
-              Supports range queries and multi-index queries
-            </li>
-            <li>
-              Supports different types - not only byte arrays
-            </li>
-          </ol>
-        </li>
-        <li>
-          Less overhead: THBase pays at least two 'table roundtrips' - one for the index table and the other for the main table
-        </li>
-        <li>
-          Quicker index expression evaluation: IHBase is using dedicated index data structures while ITHBase is using the regular HRegion scan facilities
-        </li>
-      </ol>
-      <h2>
-        <a name=IndexedHBase-Usage></a>Usage
-      </h2>
-      <p>
-        To use Indexed HBase do the following:
-      </p>
-      <ol>
-        <li>
-          Set the hbase.region.impl property to IdxRegion
-          <div class=panelMacro>
-            <table >
-              <tbody>
-              <tr>
-                <td valign=top>
-                  <img align=absmiddle alt="" border=0 height=16 width=16>
-                </td>
-                <td>
-                  <b>IdxRegion HBase configuration snippet</b><br>
-                  <div class="code panel" style=BORDER-WIDTH:1px>
-                    <div class="codeContent panelContent">
+            to speedup THbase scans</font>
+          </td>
+        </tr>
+        </tbody>
+      </table>
+      <h3>
+        <a name=IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F></a>Why do we think IHbase outperforms THBase?
+      </h3>
+      <ol>
+        <li>
+          More flexible:
+          <ol>
+            <li>
+              Supports range queries and multi-index queries
+            </li>
+            <li>
+              Supports different types - not only byte arrays
+            </li>
+          </ol>
+        </li>
+        <li>
+          Less overhead: THBase pays at least two 'table roundtrips' - one for the index table and the other for the main table
+        </li>
+        <li>
+          Quicker index expression evaluation: IHBase is using dedicated index data structures while ITHBase is using the regular HRegion scan facilities
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Usage></a>Usage
+      </h2>
+      <p><font color=red><b>WARNING: The "indexed" contrib and "transactional" contrib are not compatible with each other!</b></font></p>
+      <p>
+        To use Indexed HBase do the following:
+      </p>
+      <ol>
+        <li>
+          Set the hbase.region.impl property to IdxRegion
+          <div class=panelMacro>
+            <table >
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>IdxRegion HBase configuration snippet</b><br>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
                     <pre class=code-java>&lt;property&gt;
   &lt;name&gt;hbase.hregion.impl&lt;/name&gt;
   &lt;value&gt;org.apache.hadoop.hbase.regionserver.IdxRegion&lt;/value&gt;
-&lt;/property&gt;</pre>
-                    </div>
-                  </div>
-                </td>
-              </tr>
-              </tbody>
-            </table>
-          </div>
-        </li>
-        <li>
+&lt;/property&gt;</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
           When creating a table define which columns to index using IdxColumnDescriptor.
           The supported types are all the <a href="http://java.sun.com/docs/books/tutorial/java/nutsandbolts/datatypes.html"> java primitive data types</a>
-          except boolean, byte[], char[] and BigDecimal
-          <div class=panelMacro>
-            <table class="infoMacro zeroBorder">
-              <tbody>
-              <tr>
-                <td valign=top>
-                  <img align=absmiddle alt="" border=0 height=16 width=16>
-                </td>
-                <td>
-                  <b>Creating an HTable with an index on family:qual column</b><br>
-                  <p>
-                    Note that this snippet assumes that all the values assigned to family:qual are exactly 8 bytes, preferrably created using Bytes.toBytes(long). The table may have rows in which family:qual is missing, those rows will not be included in the index.
-                  </p>
-                  <div class="code panel" style=BORDER-WIDTH:1px>
-                    <div class="codeContent panelContent">
+          as well as byte[], char[] and BigDecimal
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Creating an HTable with an index on family:qual column</b><br>
+                  <p>
+                    Note that this snippet assumes that all the values assigned to family:qual are exactly 8 bytes, preferrably created using Bytes.toBytes(long). The table may have rows in which family:qual is missing, those rows will not be included in the index.
+                  </p>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
                     <pre class=code-java><span class=code-object>byte</span>[] tableName = Bytes.toBytes(<span class=code-quote>"table"</span>);
 <span class=code-object>byte</span>[] familyName = Bytes.toBytes(<span class=code-quote>"family"</span>);
 <span class=code-object>byte</span>[] qualifier = Bytes.toBytes(<span class=code-quote>"qual"</span>);
 
-IdxColumnDescriptor idxColumnDescriptor = <span class=code-keyword>new</span> IdxColumnDescriptor(familyPairName);
+IdxColumnDescriptor idxColumnDescriptor = <span class=code-keyword>new</span> IdxColumnDescriptor(familyName);
 IdxIndexDescriptor indexDescriptor  = <span class=code-keyword>new</span> IdxIndexDescriptor(qualifier, IdxQualifierType.LONG);
 idxColumnDescriptor.addIndexDescriptor(indexDescriptor);
 HTableDescriptor htd = <span class=code-keyword>new</span> HTableDescriptor(tableName);
 htd.addFamily(idxColumnDescriptor);
     
 HBaseConfiguration conf = <span class=code-keyword>new</span> HBaseConfiguration();
-conf.setClass(HConstants.REGION_IMPL, IdxRegion.class, IdxRegion.class);
 HBaseAdmin admin = <span class=code-keyword>new</span> HBaseAdmin(conf);
 admin.createTable(htd);
 HTable table = <span class=code-keyword>new</span> HTable(conf, desc.getName());
-     . . .</pre>
-                    </div>
-                  </div>
-                </td>
-              </tr>
-              </tbody>
-            </table>
-          </div>
-        </li>
-        <li>
-          When scanning make sure you instantiate an IdxScan and that you set the Expression property
-          <div class=panelMacro>
-            <table class="infoMacro zeroBorder">
-              <tbody>
-              <tr>
-                <td valign=top>
-                  <img align=absmiddle alt="" border=0 height=16 width=16>
-                </td>
-                <td>
-                  <b>Indexed scans</b><br>
-                  <p>
-                    Notes:
-                  </p>
-                  <ul>
-                    <li>
-                    <font color=brown><b>Setting an expression doesn't exclude setting a mathcing filter. This duplication is absolutely essential for getting correct scan results</b> </font>
-                    </li>
-                    <li>
-                    The index expression must accept any row accepted by the filter
-                    </li>
-                    <li>
-                    The filter may accept a subset of the rows accepted by the index expression (e.g. narrow down the results set)
-                    </li>
-                    <li>
-                    Setting a filter without setting an expression is supported and would revert to a 'good old scan'
-                    </li>
-                    <li>
-                    The supported expression types are comparison, and, or. Comparisons support GT, GTE, EQ, LTE, LT
-                    </li>
-                    <li>
-                    The caller may combine any number of index expressions using any of the existing indexes. Trying to add an expression for a non-indexed column would result in a runtime error
-                    <div class="code panel" style=BORDER-WIDTH:1px>
-                    <div class="codeContent panelContent">
+. . .</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
+          When scanning make sure you instantiate an instance of IdxScan and that you set the Expression property
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Indexed scans</b><br>
+                  <p>
+                    Notes:
+                  </p>
+                  <ul>
+                    <li>
+                    <font color=brown><b>When creating an IdxScan instance you must provide both an expression and filter.  The filter
+is essential for correct scan results</b> </font>
+                    </li>
+                    <li>
+                    The index expression must accept any row accepted by the filter
+                    </li>
+                    <li>
+                    The filter may accept a subset of the rows accepted by the index expression (e.g. narrow down the results set)
+                    </li>
+                    <li>
+                    Setting a filter without setting an expression is supported and would revert to a 'good old scan'
+                    </li>
+                    <li>
+                    The supported expression types are comparison, and, or. Comparisons support GT, GTE, EQ, LTE, LT
+                    </li>
+                    <li>
+                    The caller may combine any number of index expressions using any of the existing indexes. Trying to add an expression for a non-indexed column would result in a runtime error
+                    <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
                     <pre class=code-java>. . .
 IdxScan idxScan = <span class=code-keyword>new</span> IdxScan();
 idxScan.setExpression(Expression.comparison(familyName, qualifier, Comparison.Operator.EQ, Bytes.toBytes(42L));
@@ -304,36 +305,36 @@
 ResultScanner scanner = table.getScanner(idxScan);
 <span class=code-keyword>for</span> (Result res : scanner) {
    <span class=code-comment>// Do stuff with res
-</span>}</pre>
-                    </div>
-                    </div>
-                    </li>
-                  </ul>
-                </td>
-              </tr>
-              </tbody>
-            </table>
-          </div>
-        </li>
-      </ol>
-      <h2>
-        <a name=IndexedHBase-Implementationnotes></a>Implementation notes
-      </h2>
-      <ul>
-        <li>
-          We only index Store files. Every index scan performs a full memstore scan. Indexing the memstore will be implemented only if scanning the memstore will prove to be a performance bottleneck
-        </li>
-        <li>
-          Index expression evaluation is performed using bitsets. There are two types of bitsets: compressed and expanded. An index will typically store a compressed bitset while an expression evaluator will most probably use an expanded bitset
-        </li>
-        <li>
-          TODO
-        </li>
-      </ul>
-    </div>
-  </div>
-</div>
-<div id=footer>
-</div>
+</span>}</pre>
+                    </div>
+                    </div>
+                    </li>
+                  </ul>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Implementationnotes></a>Implementation notes
+      </h2>
+      <ul>
+        <li>
+          We only index Store files. Every index scan performs a full memstore scan. Indexing the memstore will be implemented only if scanning the memstore will prove to be a performance bottleneck
+        </li>
+        <li>
+          Index expression evaluation is performed using bitsets. There are two types of bitsets: compressed and expanded. An index will typically store a compressed bitset while an expression evaluator will most probably use an expanded bitset
+        </li>
+        <li>
+          TODO
+        </li>
+      </ul>
+    </div>
+  </div>
+</div>
+<div id=footer>
+</div>
 <br></body>
 </html>

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java Mon Feb 15 21:00:03 2010
@@ -36,39 +36,43 @@
    * The fixed part in the heap size calcualtion.
    */
   static final long FIXED_SIZE = ClassSize.align(ClassSize.OBJECT +
-    ClassSize.REFERENCE + 3 * (ClassSize.ARRAY + ClassSize.REFERENCE) +
+    2 * ClassSize.REFERENCE + 3 * (ClassSize.ARRAY + ClassSize.REFERENCE) +
     Bytes.SIZEOF_LONG + 2 * Bytes.SIZEOF_INT
   );
 
   /**
    * The capacity of the sets.
    */
-  private int numKeyValues;
+  private final int numKeyValues;
   /**
    * The key store - holds the col:qual values.
    */
-  private List<?> keyStore;
+  private final List<?> keyStore;
   /**
-   * The value store - holds sets with {@link numKeyValues} capacity.
+   * The value store - holds sets with {@link #numKeyValues} capacity.
    */
-  private IntSet[] valueStore;
+  private final IntSet[] valueStore;
   /**
    * Sets containing partial calculations of the tail operation.
    */
-  private IntSet[] heads;
+  private final IntSet[] heads;
   /**
    * Sets containing partial calculations of the head operation.
    */
-  private IntSet[] tails;
+  private final IntSet[] tails;
+  /**
+   * A set containing all ids matching any key in this index.
+   */
+  private final IntSet allIds;
   /**
    * The partial calculation interval (used to determine up to which point
    * to use the valueStore before grabbing a pre-calculated set.
    */
-  private int precalcInterval;
+  private final int precalcInterval;
   /**
    * The heap size.
    */
-  private long heapSize;
+  private final long heapSize;
 
   /**
    * Construct a new complete index.
@@ -77,20 +81,22 @@
    * @param valueStore      the value store
    * @param heads           a list of precalculated heads
    * @param tails           a list of precalculated tails
+   * @param allIds         a set containing all ids mathcing any key in this index
    * @param numKeyValues    the total number of KeyValues for this region
    * @param precalcInterval the interval by which tails/heads are precalculated
    */
   CompleteIndex(List<?> keyStore, IntSet[] valueStore,
-    IntSet[] heads, IntSet[] tails,
+    IntSet[] heads, IntSet[] tails, IntSet allIds,
     int numKeyValues, int precalcInterval) {
     this.keyStore = keyStore;
     this.valueStore = valueStore;
     this.heads = heads;
     this.tails = tails;
+    this.allIds = allIds;
     this.numKeyValues = numKeyValues;
     this.precalcInterval = precalcInterval;
     heapSize = FIXED_SIZE + keyStore.heapSize() + calcHeapSize(valueStore) +
-      calcHeapSize(heads) + calcHeapSize(tails);
+      calcHeapSize(heads) + calcHeapSize(tails) + allIds.heapSize();
   }
 
   /**
@@ -161,6 +167,24 @@
     return result;
   }
 
+  /**
+   * Finds all the results which match any key in this index.
+   *
+   * @return all the ids in this index.
+   */
+  @Override
+  public IntSet all() {
+    return allIds.clone();
+  }
+
+  @Override
+  public int size() {
+    return this.keyStore.size();
+  }
+
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public long heapSize() {
     return heapSize;

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java Mon Feb 15 21:00:03 2010
@@ -22,9 +22,8 @@
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.idx.IdxIndexDescriptor;
-import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
-import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BigDecimalArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.CharArrayArrayList;
@@ -34,6 +33,7 @@
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.IntegerArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.LongArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ShortArrayList;
 import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
 import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
@@ -44,8 +44,17 @@
  */
 public class CompleteIndexBuilder {
 
-  private HColumnDescriptor columnDescriptor;
-  private IdxIndexDescriptor indexDescriptor;
+  private final HColumnDescriptor columnDescriptor;
+  private final IdxIndexDescriptor indexDescriptor;
+  /**
+   * Offset extracted from the index descriptor.
+   */
+  private final int offset;
+
+  /**
+   * Length extracted from the index descriptor.
+   */
+  private final int length;
 
   /**
    * The target keystore.
@@ -56,6 +65,7 @@
    */
   private ObjectArrayList<IntSetBuilder> valueStoreBuilders;
 
+
   /**
    * Construct a new complete index builder.
    *
@@ -64,46 +74,59 @@
    */
   public CompleteIndexBuilder(HColumnDescriptor columnDescriptor,
     IdxIndexDescriptor indexDescriptor) {
+    this(columnDescriptor, indexDescriptor, 1);
+  }
+
+  /**
+   * Construct a new complete index builder.
+   *
+   * @param columnDescriptor the column descriptor
+   * @param indexDescriptor  the index descriptor
+   * @param initialSize the initial arrays size, use -1 for defaults
+   */
+  public CompleteIndexBuilder(HColumnDescriptor columnDescriptor,
+    IdxIndexDescriptor indexDescriptor, int initialSize) {
     this.columnDescriptor = columnDescriptor;
     this.indexDescriptor = indexDescriptor;
+    this.offset = indexDescriptor.getOffset();
+    this.length = indexDescriptor.getLength();
 
     switch (this.indexDescriptor.getQualifierType()) {
       case BYTE_ARRAY:
-        keyStore = new ByteArrayArrayList();
+        keyStore = new ByteArrayArrayList(initialSize);
         break;
       case LONG:
-        keyStore = new LongArrayList();
+        keyStore = new LongArrayList(initialSize);
         break;
       case DOUBLE:
-        keyStore = new DoubleArrayList();
+        keyStore = new DoubleArrayList(initialSize);
         break;
       case BYTE:
-        keyStore = new ByteArrayList();
+        keyStore = new ByteArrayList(initialSize);
         break;
       case CHAR:
-        keyStore = new CharArrayList();
+        keyStore = new CharArrayList(initialSize);
         break;
       case SHORT:
-        keyStore = new ShortArrayList();
+        keyStore = new ShortArrayList(initialSize);
         break;
       case INT:
-        keyStore = new IntegerArrayList();
+        keyStore = new IntegerArrayList(initialSize);
         break;
       case FLOAT:
-        keyStore = new FloatArrayList();
+        keyStore = new FloatArrayList(initialSize);
         break;
       case BIG_DECIMAL:
-        keyStore = new BigDecimalArrayList();
+        keyStore = new BigDecimalArrayList(initialSize);
         break;
       case CHAR_ARRAY:
-        keyStore = new CharArrayArrayList();
+        keyStore = new CharArrayArrayList(initialSize);
         break;
       default:
         throw new IllegalStateException("Unsupported type " +
           this.indexDescriptor.getQualifierType());
     }
-    valueStoreBuilders = new ObjectArrayList<IntSetBuilder>();
-
+    valueStoreBuilders = new ObjectArrayList<IntSetBuilder>(initialSize);
   }
 
   /**
@@ -115,7 +138,7 @@
   public void addKeyValue(KeyValue kv, int id) {
     assert Bytes.equals(indexDescriptor.getQualifierName(), kv.getQualifier())
       && Bytes.equals(columnDescriptor.getName(), kv.getFamily());
-    byte[] key = kv.getValue();
+    byte[] key = extractKey(kv);
     int index = BinarySearch.search(keyStore, keyStore.size(), key);
     IntSetBuilder intsetBuilder;
     if (index < 0) {
@@ -130,6 +153,25 @@
   }
 
   /**
+   * Extract the key from the KeyValue value.
+   *
+   * @param kv the key value from which to extract the key
+   * @return the extracted keyvalue.
+   */
+  private byte[] extractKey(KeyValue kv) {
+    int valueLength = kv.getValueLength();
+    int l = length == -1 ? valueLength - offset : length;
+    if (offset + l > valueLength) {
+      throw new ArrayIndexOutOfBoundsException(String.format("Can't extract key: " +
+        "Offset (%d) + Length (%d) > valueLength (%d)", offset, l, valueLength));
+    }
+    int o = kv.getValueOffset() + this.offset;
+    byte[] result = new byte[l];
+    System.arraycopy(kv.getBuffer(), o, result, 0, l);
+    return result;
+  }
+
+  /**
    * Finalized the index creation and creates the new index.
    *
    * @param numKeyValues the total number of keyvalues in the region
@@ -161,15 +203,28 @@
 
       IntSet[] heads = new IntSet[precalcSize];
       IntSet currentHead = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      int maxHeadIndex = -1;
       for (int i = 0; i < indexSize; i++) {
         currentHead = currentHead.unite(valueStore[i]);
         if (i % interval == 0) {
+          maxHeadIndex = i;
           heads[i / interval] = currentHead;
           currentHead = currentHead.clone();
         }
       }
+      
+      IntSet allIds;
+      if (maxHeadIndex < 0) {
+        allIds = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      } else {
+        allIds = currentHead.clone();
+        // Add all remaning key values to the allKeys set
+        for (int i = maxHeadIndex; i < indexSize; i++) {
+          allIds = allIds.unite(valueStore[i]);
+        }
+      }
 
-      return new CompleteIndex(keyStore, valueStore, heads, tails,
+      return new CompleteIndex(keyStore, valueStore, heads, tails, allIds,
         numKeyValues, interval);
     } else {
       return new EmptyIndex(keyStore, numKeyValues);

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java Mon Feb 15 21:00:03 2010
@@ -79,11 +79,32 @@
     return IntSetBuilder.newEmptyIntSet(numKeyValues);
   }
 
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet all() {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public String probeToString(byte[] bytes) {
     return ArrayUtils.toString(keyStore.fromBytes(bytes));
   }
 
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int size() {
+    return 0;
+  }
+
   @Override
   public long heapSize() {
     return HEAP_SIZE;

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java Mon Feb 15 21:00:03 2010
@@ -98,10 +98,18 @@
                     Bytes.toString(comparison.getQualifier())));
 
     IntSet matched = null;
+    boolean resultIncludesMissing = false;
     switch (comparison.getOperator()) {
       case EQ:
         matched = index.lookup(comparison.getValue());
         break;
+      case NEQ:
+        matched = index.lookup(comparison.getValue());
+        matched = matched.complement();
+        // When we complement the matched set we may include ids which are
+        // missing from the index
+        resultIncludesMissing = true;
+        break;
       case GT:
         matched = index.tail(comparison.getValue(), false);
         break;
@@ -116,13 +124,19 @@
         break;
     }
 
+    if (comparison.getIncludeMissing() != resultIncludesMissing) {
+      matched = resultIncludesMissing ? matched.intersect(index.all()) : matched.unite(index.all().complement());
+    }
+
     if (LOG.isDebugEnabled() && matched != null) {
       LOG.debug(String.format("Evaluation of comparison on column: '%s', " +
-          "qualifier: '%s', operator: %s, value: '%s' yielded %s matches",
-          Bytes.toString(comparison.getColumnName()),
-          Bytes.toString(comparison.getQualifier()), 
-          comparison.getOperator(),
-          index.probeToString(comparison.getValue()), matched.size()));
+        "qualifier: '%s', operator: %s, value: '%s' include missing: '%b' " +
+        "yielded %s matches",
+        Bytes.toString(comparison.getColumnName()),
+        Bytes.toString(comparison.getQualifier()),
+        comparison.getOperator(),
+        index.probeToString(comparison.getValue()),
+        comparison.getIncludeMissing(), matched.size()));
     }
 
     return matched != null ? matched : null;

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java Mon Feb 15 21:00:03 2010
@@ -55,9 +55,24 @@
   IntSet head(byte[] probe, boolean inclusive);
 
   /**
+   * Finds all the results which match any key in this index.
+   *
+   * @return all the ids in this index.
+   */
+  IntSet all();
+
+  /**
    * Returns a string representation of the provided bytes probe.
+   *
    * @param bytes the bytes
    * @return the string representation
    */
   String probeToString(byte[] bytes);
+
+  /**
+   * The number of entries in the index.
+   *
+   * @return the number of entries in the index
+   */
+  int size();
 }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java Mon Feb 15 21:00:03 2010
@@ -27,6 +27,7 @@
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.JmxHelper;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.idx.IdxScan;
 import org.apache.hadoop.hbase.client.idx.exp.Expression;
@@ -154,8 +155,14 @@
       // Grab a new search context
       IdxSearchContext searchContext = indexManager.newSearchContext();
       // use the expression evaluator to determine the final set of ints
-      IntSet matchedExpression = expressionEvaluator.evaluate(searchContext,
-        expression);
+      IntSet matchedExpression = null;
+      try {
+        matchedExpression = expressionEvaluator.evaluate(
+          searchContext, expression
+        );
+      } catch (RuntimeException e) {
+        throw new DoNotRetryIOException(e.getMessage(), e);
+      }
       if (LOG.isDebugEnabled()) {
         LOG.debug(String.format("%s rows matched the index expression",
           matchedExpression.size()));
@@ -165,6 +172,19 @@
   }
 
   /**
+   * Calculates the average number of key/values in this regions memstores.
+   *
+   * @return the average number of key values
+   */
+  int averageNumberOfMemStoreSKeys() {
+    int totalKVs = 0;
+    for (Store store : stores.values()) {
+      totalKVs += store.memstore.numKeyValues();
+    }
+    return totalKVs / this.stores.size();
+  }
+
+  /**
    * A monitoring operation which exposes the number of indexed keys.
    *
    * @return the number of indexed keys.
@@ -368,7 +388,7 @@
       for (byte[] family : regionInfo.getTableDesc().getFamiliesKeys()) {
         Store store = stores.get(family);
         scanners.addAll(getMemstoreScanners(store, scan.getStartRow()));
-        break;  // we only need one
+        //break;  // we only need one
       }
       return new KeyValueHeap(scanners.toArray(new KeyValueScanner[scanners.size()]), comparator);
     }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java Mon Feb 15 21:00:03 2010
@@ -42,6 +42,7 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
@@ -57,10 +58,12 @@
       Bytes.SIZEOF_LONG + ClassSize.REENTRANT_LOCK);
 
 
+  private static final int DEFAULT_INITIAL_INDEX_SIZE = 1000;
+
   /**
    * The wrapping region.
    */
-  private IdxRegion region;
+  private final IdxRegion region;
   /**
    * The index map. Each pair holds the column and qualifier.
    */
@@ -75,7 +78,9 @@
    */
   private long heapSize;
 
-  private ReadWriteLock indexSwitchLock;
+  private final ReadWriteLock indexSwitchLock;
+  private static final double INDEX_SIZE_GROWTH_FACTOR = 1.1;
+  private static final double BYTES_IN_MB = 1024D * 1024D;
 
   /**
    * Create and initialize a new index manager.
@@ -98,20 +103,17 @@
   public long rebuildIndexes() throws IOException {
     long startMillis = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info(String.format("Initializing index manager for region: %s",
-        region.toString()));
+      LOG.info(String.format("Initializing index manager for region: %s", region.toString()));
     }
     heapSize = FIXED_SIZE;
-    Map<Pair<byte[], byte[]>, CompleteIndexBuilder>
-      builderTable = initIndexTable();
+    Map<Pair<byte[], byte[]>, CompleteIndexBuilder> builderTable = initIndexTable();
     // if the region is closing/closed then a fillIndex method will throw a
     // NotServingRegion exection when an attempt to obtain a scanner is made
     // NOTE: when the region is being created isClosing() returns true
     if (!(region.isClosing() || region.isClosed()) && !builderTable.isEmpty()) {
       try {
         ObjectArrayList<KeyValue> newKeys = fillIndex(builderTable);
-        Map<Pair<byte[], byte[]>, IdxIndex> newIndexMap =
-          finalizeIndex(builderTable, newKeys);
+        Map<Pair<byte[], byte[]>, IdxIndex> newIndexMap = finalizeIndex(builderTable, newKeys);
         switchIndex(newKeys, newIndexMap);
       } catch (NotServingRegionException e) {
         // the not serving exception may also be thrown during the scan if
@@ -119,8 +121,7 @@
         LOG.warn("Aborted index initialization", e);
       }
     } else {
-    switchIndex(new ObjectArrayList<KeyValue>(),
-      Collections.<Pair<byte[], byte[]>, IdxIndex>emptyMap());
+      switchIndex(new ObjectArrayList<KeyValue>(), Collections.<Pair<byte[], byte[]>, IdxIndex>emptyMap());
     }
     return System.currentTimeMillis() - startMillis;
   }
@@ -143,20 +144,19 @@
    * @return the initiated map of builders keyed by column:qualifer pair
    * @throws IOException thrown by {@link IdxColumnDescriptor#getIndexDescriptors(org.apache.hadoop.hbase.HColumnDescriptor)}
    */
-  private Map<Pair<byte[], byte[]>,
-    CompleteIndexBuilder> initIndexTable() throws IOException {
+  private Map<Pair<byte[], byte[]>, CompleteIndexBuilder> initIndexTable()
+    throws IOException {
     Map<Pair<byte[], byte[]>, CompleteIndexBuilder> indexBuilders =
       new HashMap<Pair<byte[], byte[]>, CompleteIndexBuilder>();
-    for (HColumnDescriptor columnDescriptor :
-      region.getRegionInfo().getTableDesc().getColumnFamilies()) {
-      Collection<IdxIndexDescriptor> indexDescriptors =
-        IdxColumnDescriptor.getIndexDescriptors(columnDescriptor).values();
+    for (HColumnDescriptor columnDescriptor : region.getRegionInfo().getTableDesc().getColumnFamilies()) {
+      Collection<IdxIndexDescriptor> indexDescriptors = IdxColumnDescriptor.getIndexDescriptors(columnDescriptor).values();
+
       for (IdxIndexDescriptor indexDescriptor : indexDescriptors) {
-        LOG.info(String.format("Adding index for region: '%s' index: %s",
-          region.getRegionNameAsString(), indexDescriptor.toString()));
-        indexBuilders.put(Pair.of(columnDescriptor.getName(),
-          indexDescriptor.getQualifierName()),
-          new CompleteIndexBuilder(columnDescriptor, indexDescriptor));
+        LOG.info(String.format("Adding index for region: '%s' index: %s", region.getRegionNameAsString(), indexDescriptor.toString()));
+        Pair<byte[], byte[]> key = Pair.of(columnDescriptor.getName(), indexDescriptor.getQualifierName());
+        IdxIndex currentIndex = indexMap != null ? indexMap.get(key) : null;
+        int initialSize = currentIndex == null ? DEFAULT_INITIAL_INDEX_SIZE : (int) Math.round(currentIndex.size() * INDEX_SIZE_GROWTH_FACTOR);
+        indexBuilders.put(key, new CompleteIndexBuilder(columnDescriptor, indexDescriptor, initialSize));
       }
     }
     return indexBuilders;
@@ -174,38 +174,54 @@
     CompleteIndexBuilder> builders) throws IOException {
     ObjectArrayList<KeyValue> newKeys = this.keys == null ?
       new ObjectArrayList<KeyValue>() :
-      new ObjectArrayList<KeyValue>(this.keys.size());
+      // in case we already have keys in the store try to guess the new size
+      new ObjectArrayList<KeyValue>(this.keys.size() + this.region.averageNumberOfMemStoreSKeys() * 2);
 
     StopWatch stopWatch = new StopWatch();
     stopWatch.start();
 
-    InternalScanner scanner = region.getScanner(new Scan());
-    boolean moreRows;
-    int id = 0;
-    do {
-      List<KeyValue> nextRow = new ArrayList<KeyValue>();
-      moreRows = scanner.next(nextRow);
-      if (nextRow.size() > 0) {
-        KeyValue
-          firstOnRow = KeyValue.createFirstOnRow(nextRow.get(0).getRow());
-        newKeys.add(firstOnRow);
-        // add keyvalue to the heapsize
-        heapSize += firstOnRow.heapSize();
-        for (KeyValue keyValue : nextRow) {
-          CompleteIndexBuilder idx = builders.get(Pair.of(keyValue.getFamily(),
-            keyValue.getQualifier()));
-          if (idx != null) {
-            idx.addKeyValue(keyValue, id);
+    InternalScanner scanner = region.getScanner(createScan(builders.keySet()));
+    try {
+      boolean moreRows;
+      int id = 0;
+      do {
+        List<KeyValue> nextRow = new ArrayList<KeyValue>();
+        moreRows = scanner.next(nextRow);
+        if (nextRow.size() > 0) {
+          KeyValue firstOnRow = KeyValue.createFirstOnRow(nextRow.get(0).getRow());
+          newKeys.add(firstOnRow);
+          // add keyvalue to the heapsize
+          heapSize += firstOnRow.heapSize();
+          for (KeyValue keyValue : nextRow) {
+            try {
+              CompleteIndexBuilder idx = builders.get(Pair.of(keyValue.getFamily(),
+                keyValue.getQualifier()));
+              // we must have an index since we've limited the
+              // scan to include only indexed columns
+              assert idx != null;
+              idx.addKeyValue(keyValue, id);
+            } catch (Exception e) {
+              LOG.error("Failed to add " + keyValue + " to the index", e);
+            }
           }
+          id++;
         }
-        id++;
-      }
-    } while (moreRows);
+      } while (moreRows);
+      stopWatch.stop();
+      LOG.info("Filled indices for region: '" + region.getRegionNameAsString()
+        + "' with " + id + " entries in " + stopWatch.toString());
+      return newKeys;
+    } finally {
+      scanner.close();
+    }
+  }
 
-    stopWatch.stop();
-    LOG.info("Filled indices for region: '" + region.getRegionNameAsString()
-      + "' with " + id + " entries in " + stopWatch.toString());
-    return newKeys;
+  private Scan createScan(Set<Pair<byte[], byte[]>> columns) {
+    Scan scan = new Scan();
+    for (Pair<byte[], byte[]> column : columns) {
+      scan.addColumn(column.getFirst(), column.getSecond());
+    }
+    return scan;
   }
 
   /**
@@ -216,25 +232,37 @@
    * @param newKeys  the set of keys for the new index to be finalized
    * @return the new index map
    */
-  private Map<Pair<byte[], byte[]>, IdxIndex>
-  finalizeIndex(Map<Pair<byte[], byte[]>,
+  private Map<Pair<byte[], byte[]>, IdxIndex> finalizeIndex(Map<Pair<byte[], byte[]>,
     CompleteIndexBuilder> builders, ObjectArrayList<KeyValue> newKeys) {
-    Map<Pair<byte[], byte[]>, IdxIndex>
-      newIndexes = new HashMap<Pair<byte[], byte[]>, IdxIndex>();
+    Map<Pair<byte[], byte[]>, IdxIndex> newIndexes = new HashMap<Pair<byte[], byte[]>, IdxIndex>();
     for (Map.Entry<Pair<byte[], byte[]>, CompleteIndexBuilder> indexEntry :
       builders.entrySet()) {
-      IdxIndex index = indexEntry.getValue().finalizeIndex(newKeys.size());
-      newIndexes.put(indexEntry.getKey(), index);
+      final IdxIndex index = indexEntry.getValue().finalizeIndex(newKeys.size());
+      final Pair<byte[], byte[]> key = indexEntry.getKey();
+      newIndexes.put(key, index);
       // adjust the heapsize
-      heapSize += ClassSize.align(ClassSize.MAP_ENTRY +
+      long indexSize = ClassSize.align(ClassSize.MAP_ENTRY +
         ClassSize.align(ClassSize.OBJECT + 2 * ClassSize.ARRAY +
-          indexEntry.getKey().getFirst().length +
-          indexEntry.getKey().getSecond().length) + index.heapSize()
-      );
+          key.getFirst().length +
+          key.getSecond().length) + index.heapSize());
+      LOG.info(String.format("Final index size: %f mb for region: '%s' index: %s",
+        toMb(indexSize), Bytes.toString(key.getFirst()), Bytes.toString(key.getSecond())));
+      heapSize += indexSize;
     }
+    LOG.info(String.format("Total index heap overhead: %f mb for region: '%s'",
+      toMb(heapSize), region.getRegionNameAsString()));
     return newIndexes;
   }
 
+  private double toMb(long bytes) {
+    return bytes / BYTES_IN_MB;
+  }
+
+  /**
+   * Create a new search context.
+   *
+   * @return the new search context.
+   */
   public IdxSearchContext newSearchContext() {
     indexSwitchLock.readLock().lock();
     try {

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java Mon Feb 15 21:00:03 2010
@@ -83,39 +83,31 @@
    *
    * @param regionInfo the region info to create the object name from.
    * @return an valid object name.
+   * @throws IllegalStateException if an error occurs while generating the
+   * object name
    */
-  static ObjectName generateObjectName(HRegionInfo regionInfo) {
+  static ObjectName generateObjectName(HRegionInfo regionInfo)
+      throws IllegalStateException{
     StringBuilder builder =
       new StringBuilder(IdxRegionMBeanImpl.class.getPackage().getName());
     builder.append(':');
     builder.append("table=");
+    // according to HTableDescriptor.isLegalTableName() the table name
+    // will never contain invalid parameters
     builder.append(regionInfo.getTableDesc().getNameAsString());
     builder.append(',');
 
     builder.append("id=");
     builder.append(regionInfo.getRegionId());
     builder.append(',');
-
-    if (regionInfo.getStartKey() != null &&
-      regionInfo.getStartKey().length > 0) {
-      builder.append("startKey=");
-      builder.append(Bytes.toString(regionInfo.getStartKey()));
-      builder.append(',');
-    }
-
-    if (regionInfo.getEndKey() != null &&
-      regionInfo.getEndKey().length > 0) {
-      builder.append("endKey=");
-      builder.append(Bytes.toString(regionInfo.getEndKey()));
-      builder.append(',');
-    }
-
     builder.append("type=IdxRegion");
     try {
       return ObjectName.getInstance(builder.toString());
     } catch (MalformedObjectNameException e) {
-      throw new IllegalStateException("Failed to create a legal object name",
-        e);
+      throw new IllegalStateException("Failed to create a legal object name " +
+          "for JMX console.  Generated name was [" + builder.toString() + "]",
+          e
+      );
     }
   }
 

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/idx/support/sets/BitSet.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/idx/support/sets/BitSet.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/idx/support/sets/BitSet.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/idx/support/sets/BitSet.java Mon Feb 15 21:00:03 2010
@@ -177,12 +177,14 @@
    */
   @Override
   public IntSet complement() {
-    size = -1;
-    for (int i = 0; i < words.length; i++) {
-      words[i] = ~words[i];
+    if (capacity > 0) {
+      size = -1;
+      for (int i = 0; i < words.length; i++) {
+        words[i] = ~words[i];
+      }
+      words[words.length - 1] ^= capacity % WORD_BIT_COUNT == 0 ?
+        0 : -1L << capacity;  // get rid of the trailing ones
     }
-    words[words.length - 1] ^= capacity % WORD_BIT_COUNT == 0 ?
-      0 : -1L << capacity;  // get rid of the trailing ones
     return this;
   }
 

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/client/idx/TestIdxIndexDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/client/idx/TestIdxIndexDescriptor.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/client/idx/TestIdxIndexDescriptor.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/client/idx/TestIdxIndexDescriptor.java Mon Feb 15 21:00:03 2010
@@ -38,19 +38,59 @@
    * @throws java.io.IOException if an error occurs
    */
   public void testWritable() throws IOException {
-    byte[] qualifierName1 = Bytes.toBytes("qualifer1");
-    IdxIndexDescriptor descriptor
-        = new IdxIndexDescriptor(qualifierName1, IdxQualifierType.INT);
+    final int repeatCount = 3;
+    IdxIndexDescriptor descriptor = createIdxIndexDescriptor();
 
     DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
-    descriptor.write(dataOutputBuffer);
+    for (int i = 0; i < repeatCount; i++) {
+      descriptor.write(dataOutputBuffer);
+    }
 
     DataInputBuffer dataInputBuffer = new DataInputBuffer();
     dataInputBuffer.reset(dataOutputBuffer.getData(), dataOutputBuffer.getLength());
 
-    IdxIndexDescriptor clonedDescriptor = new IdxIndexDescriptor();
-    clonedDescriptor.readFields(dataInputBuffer);
+    for (int i = 0; i < repeatCount; i++) {
+      IdxIndexDescriptor clonedDescriptor = new IdxIndexDescriptor();
+      clonedDescriptor.readFields(dataInputBuffer);
+      Assert.assertEquals("The descriptor was not the same after being written and " +
+        "read attempt=" + i, descriptor, clonedDescriptor);
+    }
+  }
+
+  /**
+   * Tests the equals method.
+   */
+  public void testEquals() {
+    IdxIndexDescriptor ix1 = createIdxIndexDescriptor();
+
+    IdxIndexDescriptor ix2 = createIdxIndexDescriptor();
+    Assert.assertEquals(ix1, ix2);
+
+    ix2.getQualifierName()[0]=9;
+    Assert.assertFalse(ix1.equals(ix2));
+
+    ix2 = createIdxIndexDescriptor();
+    ix2.setQualifierType(IdxQualifierType.LONG);
+    Assert.assertFalse(ix1.equals(ix2));
 
-    Assert.assertEquals("The descriptor was not the same after being written and read", descriptor, clonedDescriptor);
+    ix2 = createIdxIndexDescriptor();
+    ix2.setOffset(1);
+    Assert.assertFalse(ix1.equals(ix2));
+
+    ix2 = createIdxIndexDescriptor();
+    ix2.setLength(-1);
+    Assert.assertFalse(ix1.equals(ix2));
+
+  }
+
+  private static IdxIndexDescriptor createIdxIndexDescriptor() {
+    byte[] qualifierName1 = Bytes.toBytes("qualifer1");
+    IdxIndexDescriptor descriptor
+      = new IdxIndexDescriptor(qualifierName1, IdxQualifierType.INT);
+    descriptor.setLength(4);
+    descriptor.setOffset(2);
+    return descriptor;
   }
+
+
 }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestCompleteIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestCompleteIndex.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestCompleteIndex.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestCompleteIndex.java Mon Feb 15 21:00:03 2010
@@ -84,6 +84,31 @@
     Assert.assertTrue(
       bldr.finalizeIndex(NUM_KEY_VALUES).lookup(value).contains(id));
 
+    /**
+     * Test with shorter length.
+     */
+    value = Bytes.toBytes(109L);
+    byte[] intValue = Bytes.toBytes(109);
+    id = 37;
+    bldr = new CompleteIndexBuilder(columnDescriptor,
+      new IdxIndexDescriptor(QUALIFIER, IdxQualifierType.INT, 0, 4));
+    bldr.addKeyValue(new KeyValue(ROW, FAMILY, QUALIFIER, value), id);
+    IdxIndex finalIndex = bldr.finalizeIndex(NUM_KEY_VALUES);
+    Assert.assertEquals(0, finalIndex.lookup(intValue).size());
+    Assert.assertTrue(finalIndex.lookup(Bytes.toBytes(0)).contains(id));
+
+    /**
+     * Test with offset and shorter length.
+     */
+    value = Bytes.toBytes(109L);
+    intValue = Bytes.toBytes(109);
+    id = 39;
+    bldr = new CompleteIndexBuilder(columnDescriptor,
+      new IdxIndexDescriptor(QUALIFIER, IdxQualifierType.INT, 4, 4));
+    bldr.addKeyValue(new KeyValue(ROW, FAMILY, QUALIFIER, value), id);
+    finalIndex = bldr.finalizeIndex(NUM_KEY_VALUES);
+    Assert.assertTrue(finalIndex.lookup(intValue).contains(id));
+    Assert.assertEquals(0, finalIndex.lookup(Bytes.toBytes(0)).size());
   }
 
   /**
@@ -291,17 +316,39 @@
     return TestBitSet.createBitSet(NUM_KEY_VALUES, items);
   }
 
-  private static CompleteIndex fillIndex(long[] values, int[] ids) {
-    Assert.assertEquals(values.length, ids.length);
-    HColumnDescriptor columnDescriptor = new HColumnDescriptor(FAMILY);
+  private static CompleteIndex fillIndex(long[] keys, int[] ids) {
+    return fillIndex(keys, ids, FAMILY, QUALIFIER, NUM_KEY_VALUES);
+  }
+
+  /**
+   * A utility method to create a complete index.
+   *
+   * @param keys the index keys
+   * @param ids    the key/value ids
+   * @param family the family id
+   * @param qualifier the qualifier id
+   * @param numKeyValues the total number of key/values when finalizing the
+   * index
+   * @return a new, populated index.
+   */
+  static CompleteIndex fillIndex(long[] keys, int[] ids, byte[] family,
+    byte[] qualifier, int numKeyValues) {
+    Assert.assertEquals(keys.length, ids.length);
+    HColumnDescriptor columnDescriptor = new HColumnDescriptor(family);
     CompleteIndexBuilder completeIndex =
       new CompleteIndexBuilder(columnDescriptor,
-        new IdxIndexDescriptor(QUALIFIER, IdxQualifierType.LONG));
-    for (int i = 0; i < values.length; i++) {
-      completeIndex.addKeyValue(new KeyValue(Bytes.toBytes(ids[i]), FAMILY,
-        QUALIFIER, Bytes.toBytes(values[i])), ids[i]);
+        new IdxIndexDescriptor(qualifier, IdxQualifierType.LONG));
+    for (int i = 0; i < keys.length; i++) {
+      completeIndex.addKeyValue(new KeyValue(Bytes.toBytes(ids[i]), family,
+        qualifier, Bytes.toBytes(keys[i])), ids[i]);
+    }
+    CompleteIndex ix = (CompleteIndex) completeIndex.finalizeIndex(numKeyValues);
+    IntSet allIds = ix.all();
+    Assert.assertEquals(ids.length, allIds.size());
+    for (int id : ids){
+      Assert.assertTrue(allIds.contains(id));
     }
-    return (CompleteIndex) completeIndex.finalizeIndex(NUM_KEY_VALUES);
+    return ix;
   }
 
 }

Modified: hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxExpressionEvaluator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxExpressionEvaluator.java?rev=910334&r1=910333&r2=910334&view=diff
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxExpressionEvaluator.java (original)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/test/org/apache/hadoop/hbase/regionserver/TestIdxExpressionEvaluator.java Mon Feb 15 21:00:03 2010
@@ -19,25 +19,32 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import junit.framework.TestCase;
 import junit.framework.Assert;
-import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hadoop.hbase.util.Bytes;
+import junit.framework.TestCase;
+import org.apache.hadoop.hbase.client.idx.exp.Comparison;
+import org.apache.hadoop.hbase.client.idx.exp.Expression;
 import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
-//import org.apache.hadoop.hbase.regionserver.idx.support.sets.BitSet;
 import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
-import org.apache.hadoop.hbase.client.idx.exp.Expression;
-import org.apache.hadoop.hbase.client.idx.exp.Comparison;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.TestBitSet;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
 import org.easymock.EasyMock;
 
-import java.util.Map;
 import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Tests the {@link IdxExpressionEvaluator} class.
  */
 public class TestIdxExpressionEvaluator extends TestCase {
+
+  private static final int NUM_KEY_VALUES = 107;
+  private static final byte[] FAMILY = Bytes.toBytes("f");
+  private static final byte[] QUALIFIER = Bytes.toBytes("q");
+
   private TestSearchContext searchContext;
+  private static final long[] KEYS = new long[]{1, 4, 9, 7, 1, 2, 8, 1, 4, 1, 9};
+  private static final int[] IDS = new int[]{1, 7, 8, 32, 40, 66, 67, 80, 86, 90, 106};
 
   @Override
   protected void setUp() throws Exception {
@@ -60,7 +67,7 @@
 
     // perform the test
     IdxExpressionEvaluator evaluator = new IdxExpressionEvaluator();
-    Expression exp = Expression.comparison(column, qualifier, Comparison.Operator.EQ, value);
+    Expression exp = Expression.comparison(column, qualifier, Comparison.Operator.EQ, value, false);
     IntSet intSet = evaluator.evaluate(searchContext, exp);
 
     // assert the evaluator interacted with the indices correctly
@@ -69,6 +76,98 @@
   }
 
   /**
+   * Tests the neq op.
+   */
+  public void testNEQ() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.NEQ, 1L, false);
+    assertSetsEqual(makeSet(7, 8, 32, 66, 67, 86, 106), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.NEQ, 1L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(7, 8, 32, 66, 67, 86, 106)), intSet);
+  }
+
+  /**
+   * Tests the eq op.
+   */
+  public void testEQ() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.EQ, 4L, false);
+    assertSetsEqual(makeSet(7, 86), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.EQ, 4L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(7, 86)), intSet);
+  }
+
+  /**
+   * Tests the gt op.
+   */
+  public void testGT() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.GT, 7L, false);
+    assertSetsEqual(makeSet(8, 67, 106), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.GT, 7L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(8, 67, 106)), intSet);
+
+  }
+
+  /**
+   * Tests the gt op.
+   */
+  public void testGTE() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.GTE, 8L, false);
+    assertSetsEqual(makeSet(8, 67, 106), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.GTE, 8L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(8, 67, 106)), intSet);
+  }
+
+  /**
+   * Tests the gt op.
+   */
+  public void testLT() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.LT, 3L, false);
+    assertSetsEqual(makeSet(1, 40, 66, 80, 90), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.LT, 3L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(1, 40, 66, 80, 90)), intSet);
+
+  }
+
+  /**
+   * Tests the gt op.
+   */
+  public void testLTE() {
+    IntSet intSet = evaluateComprison(Comparison.Operator.LTE, 2L, false);
+    assertSetsEqual(makeSet(1, 40, 66, 80, 90), intSet);
+
+    intSet = evaluateComprison(Comparison.Operator.LTE, 2L, true);
+    assertSetsEqual(makeSet(IDS).complement().unite(makeSet(1, 40, 66, 80, 90)), intSet);
+  }
+
+  private IntSet evaluateComprison(Comparison.Operator op, long value, boolean includeMissing) {
+    IdxIndex index = TestCompleteIndex.fillIndex(KEYS, IDS, FAMILY, QUALIFIER, NUM_KEY_VALUES);
+    TestSearchContext sc = new TestSearchContext();
+    sc.indices.put(Pair.of(FAMILY, QUALIFIER), index);
+
+    IdxExpressionEvaluator evaluator = new IdxExpressionEvaluator();
+    return evaluator.evaluate(sc,
+      Expression.comparison(FAMILY, QUALIFIER, op, Bytes.toBytes(value), includeMissing));
+  }
+
+  private static void assertSetsEqual(IntSet is1, IntSet is2) {
+    Assert.assertEquals(is1.capacity(), is2.capacity());
+    Assert.assertEquals(is1.size(), is2.size());
+    IntSet.IntSetIterator iter = is1.iterator();
+    while (iter.hasNext()) {
+      int element = iter.next();
+      Assert.assertTrue("element: " + element, is2.contains(element));
+    }
+  }
+
+  private IntSet makeSet(int... elements) {
+    return TestBitSet.createBitSet(NUM_KEY_VALUES, elements);
+  }
+
+  /**
    * Tests an Or expression containing two comparisons.
    */
   public void testEvaluationOrExpression() {
@@ -77,7 +176,7 @@
     byte[] qualifier1 = Bytes.toBytes("qualifier1");
     byte[] value1 = Bytes.toBytes("value1");
     IdxIndex index1 = EasyMock.createMock(IdxIndex.class);
-    IntSet bitSet1 = new IntSetBuilder().start().addAll(1,2,3,4,5).finish(100);
+    IntSet bitSet1 = new IntSetBuilder().start().addAll(1, 2, 3, 4, 5).finish(100);
     EasyMock.expect(index1.head(value1, false)).andReturn(bitSet1);
     EasyMock.expect(index1.probeToString(value1)).andReturn(Bytes.toString(value1)).anyTimes();
     EasyMock.replay(index1);
@@ -96,8 +195,8 @@
     // perform the test
     IdxExpressionEvaluator evaluator = new IdxExpressionEvaluator();
     Expression exp = Expression.or(
-        Expression.comparison(column1, qualifier1, Comparison.Operator.LT, value1),
-        Expression.comparison(column2, qualifier2, Comparison.Operator.GT, value2)
+      Expression.comparison(column1, qualifier1, Comparison.Operator.LT, value1, false),
+      Expression.comparison(column2, qualifier2, Comparison.Operator.GT, value2, false)
     );
     IntSet intSet = evaluator.evaluate(searchContext, exp);
 
@@ -137,8 +236,8 @@
     // perform the test
     IdxExpressionEvaluator evaluator = new IdxExpressionEvaluator();
     Expression exp = Expression.and(
-        Expression.comparison(column1, qualifier1, Comparison.Operator.LTE, value1),
-        Expression.comparison(column2, qualifier2, Comparison.Operator.GTE, value2)
+      Expression.comparison(column1, qualifier1, Comparison.Operator.LTE, value1, false),
+      Expression.comparison(column2, qualifier2, Comparison.Operator.GTE, value2, false)
     );
     IntSet intSet = evaluator.evaluate(searchContext, exp);
 
@@ -160,7 +259,7 @@
     byte[] column1 = Bytes.toBytes("column1");
     byte[] qualifier1 = Bytes.toBytes("qualifier1");
     byte[] value1 = Bytes.toBytes("value1");
-    IntSet bitSet1 = new IntSetBuilder().start().addAll(1,2,3,4,5,6).finish(100);
+    IntSet bitSet1 = new IntSetBuilder().start().addAll(1, 2, 3, 4, 5, 6).finish(100);
     EasyMock.expect(index1.head(value1, true)).andReturn(bitSet1);
     EasyMock.expect(index1.probeToString(value1)).andReturn(Bytes.toString(value1)).anyTimes();
     EasyMock.replay(index1);
@@ -189,11 +288,11 @@
     // perform the test
     IdxExpressionEvaluator evaluator = new IdxExpressionEvaluator();
     Expression exp = Expression.or(
-        Expression.and(
-            Expression.comparison(column1, qualifier1, Comparison.Operator.LTE, value1),
-            Expression.comparison(column2, qualifier2, Comparison.Operator.GTE, value2)
-        ),
-        Expression.comparison(column3, qualifier3, Comparison.Operator.EQ, value3)
+      Expression.and(
+        Expression.comparison(column1, qualifier1, Comparison.Operator.LTE, value1, false),
+        Expression.comparison(column2, qualifier2, Comparison.Operator.GTE, value2, false)
+      ),
+      Expression.comparison(column3, qualifier3, Comparison.Operator.EQ, value3, false)
     );
 
     IntSet intSet = evaluator.evaluate(searchContext, exp);
@@ -243,7 +342,7 @@
 
     public TestSearchContext() {
       super(null, null);
-      indices = new HashMap<Pair<byte[],byte[]>, IdxIndex>();
+      indices = new HashMap<Pair<byte[], byte[]>, IdxIndex>();
     }
 
     @Override



Mime
View raw message