hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From li...@apache.org
Subject svn commit: r1560792 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/filter/ main/java/org/apache/hadoop/hbase/io/ test/java/org/apache/hadoop/hbase/filter/
Date Thu, 23 Jan 2014 19:18:23 GMT
Author: liyin
Date: Thu Jan 23 19:18:23 2014
New Revision: 1560792

URL: http://svn.apache.org/r1560792
Log:
[HBASE-10027] Adding CompoundRowPrefixFilter, a more efficient way to perform batch prefix
gets.

Author: manukranthk

Summary: The idea is to take a list of prefixes and scan the region to fetch the rows from
the region which match the given list of prefixes.

Test Plan: Unit tests

Reviewers: liyintang, adela, aaiyer

Reviewed By: liyintang

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D1137373

Task ID: 3548003

Added:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/filter/CompoundRowPrefixFilter.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/filter/TestCompoundRowPrefixFilter.java
Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/filter/CompoundRowPrefixFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/filter/CompoundRowPrefixFilter.java?rev=1560792&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/filter/CompoundRowPrefixFilter.java
(added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/filter/CompoundRowPrefixFilter.java
Thu Jan 23 19:18:23 2014
@@ -0,0 +1,197 @@
+/*
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.TreeSet;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.common.base.Preconditions;
+
+public class CompoundRowPrefixFilter extends FilterBase {
+  private List<byte[]> rowPrefixes;
+  private byte[] curRowPrefix;
+  private int curPrefixIndex;
+  private boolean done = false;
+
+  public CompoundRowPrefixFilter() {
+    // Used for serialization and deserialization directly.
+    // Not to be used otherwise.
+  }
+
+  /**
+   * Constructor for CompoundRowPrefixFilter which takes a list of row prefixes
+   * @param rowPrefixes : Assuming that one row prefix is not a prefix of
+   * another. In which case, we can remove the later from the list.
+   * TODO : Do this here and remove this assumption.
+   */
+  private CompoundRowPrefixFilter(List<byte[]> rowPrefixes) {
+    this.rowPrefixes = rowPrefixes;
+    Preconditions.checkArgument(rowPrefixes.size() > 0,
+        "Requires atleast one row prefix to initialize.");
+    // sorting the rowPrefixes to keep them in increasing order.
+    this.resetFilterProgress();
+    Collections.sort(this.rowPrefixes, Bytes.BYTES_COMPARATOR);
+  }
+
+  /**
+   * Resets the filter by resetting the state of the filter to the starting
+   * configuration.
+   */
+  public void resetFilterProgress() {
+    this.curPrefixIndex = 0;
+    this.curRowPrefix = rowPrefixes.get(this.curPrefixIndex);
+    this.done = false;
+  }
+
+  @Override
+  public boolean filterAllRemaining() {
+    return this.done;
+  }
+
+  /**
+   * Updates the state of the filter to the next rowPrefix.
+   * @return false if current prefix is updated. true if updated.
+   */
+  private boolean checkAndUpdateNextPrefix() {
+    if ((this.curPrefixIndex + 1) >= this.rowPrefixes.size()) {
+      done = true;
+      return false;
+    }
+    this.curRowPrefix = this.rowPrefixes.get(++this.curPrefixIndex);
+    return true;
+  }
+
+  @Override
+  public ReturnCode filterKeyValue(KeyValue currentKv) {
+    while (true) {
+      if (Bytes.startsWith(currentKv.getRow(), this.curRowPrefix)) {
+        return ReturnCode.INCLUDE;
+      } else if (Bytes.compareTo(currentKv.getRow(), this.curRowPrefix) > 0) {
+        // Covered this RowPrefix, hence we can proceed to the next rowPrefix
+        // in the list
+        if (!this.checkAndUpdateNextPrefix()) break;
+      } else {
+        break;
+      }
+    }
+    if (done) return ReturnCode.SKIP;
+    return ReturnCode.SEEK_NEXT_USING_HINT;
+  }
+
+  @Override
+  public KeyValue getNextKeyHint(KeyValue currentKV) {
+    return KeyValue.createFirstOnRow(this.curRowPrefix);
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("CompoundRowFilter : ");
+    sb.append("Internal State : [ ");
+    sb.append(" curRowPrefix : " + Bytes.toStringBinary(curRowPrefix));
+    sb.append(" curPrefixIndex : " + this.curPrefixIndex);
+    sb.append(" done : " + this.done + " ], rowPrefixes : [ ");
+    for (byte[] arr : this.rowPrefixes) {
+      sb.append(Bytes.toStringBinary(arr) + ", ");
+    }
+    sb.append(" ]");
+    return sb.toString();
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    int numRowPrefixes = in.readInt();
+    this.rowPrefixes = new ArrayList<byte[]>(numRowPrefixes);
+    for (int i = 0; i < numRowPrefixes; i++) {
+      rowPrefixes.add(Bytes.readByteArray(in));
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(this.rowPrefixes.size());
+    for (byte[] row : this.rowPrefixes) {
+      Bytes.writeByteArray(out, row);
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result
+        + ((rowPrefixes == null) ? 0 : rowPrefixes.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    CompoundRowPrefixFilter other = (CompoundRowPrefixFilter) obj;
+    if (rowPrefixes == null) {
+      if (other.rowPrefixes != null)
+        return false;
+    } else {
+      TreeSet<byte[]> set = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
+      for (byte[] arr : rowPrefixes) {
+        set.add(arr);
+      }
+      for (byte[] arr : other.rowPrefixes) {
+        if (!set.contains(arr)) return false;
+      }
+    }
+    return true;
+  }
+
+  public static class Builder {
+    private List<byte[]> underlyingArray;
+    public Builder() {
+      this.underlyingArray = new ArrayList<byte[]>();
+    }
+
+    /**
+     * Adds a copy of arr to the internal list.
+     * @param arr
+     */
+    public Builder addRowPrefix(byte[] arr) {
+      this.underlyingArray.add(Arrays.copyOf(arr, arr.length));
+      return this;
+    }
+
+    public CompoundRowPrefixFilter create() {
+      return new CompoundRowPrefixFilter(this.underlyingArray);
+    }
+  }
+}

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java?rev=1560792&r1=1560791&r2=1560792&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java
Thu Jan 23 19:18:23 2014
@@ -57,6 +57,7 @@ import org.apache.hadoop.hbase.filter.Co
 import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
 import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
 import org.apache.hadoop.hbase.filter.CompareFilter;
+import org.apache.hadoop.hbase.filter.CompoundRowPrefixFilter;
 import org.apache.hadoop.hbase.filter.DependentColumnFilter;
 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
 import org.apache.hadoop.hbase.filter.InclusiveStopFilter;
@@ -218,6 +219,7 @@ public class HbaseObjectWritable impleme
     addToMap(MultiAction.class, code++);
     addToMap(MultiResponse.class, code++);
     addToMap(HFileHistogram.Bucket.class, code++);
+    addToMap(CompoundRowPrefixFilter.class, code++);
   }
 
   private Class<?> declaredClass;

Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/filter/TestCompoundRowPrefixFilter.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/filter/TestCompoundRowPrefixFilter.java?rev=1560792&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/filter/TestCompoundRowPrefixFilter.java
(added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/filter/TestCompoundRowPrefixFilter.java
Thu Jan 23 19:18:23 2014
@@ -0,0 +1,199 @@
+/*
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestCompoundRowPrefixFilter {
+
+  private byte[][] PREFIXES = new byte[][] {
+      Bytes.toBytes("com.abc.news"),
+      Bytes.toBytes("com.google.news"),
+      Bytes.toBytes("com.fb.apps")
+  };
+
+  private byte[][] keys = new byte[][] {
+      Bytes.toBytes("com.abc.ads"),
+      Bytes.toBytes("com.abc.news.india"),
+      Bytes.toBytes("com.abc.news.us"),
+      Bytes.toBytes("com.fb.ads"),
+      Bytes.toBytes("com.fb.ads.feed"),
+      Bytes.toBytes("com.fb.apps.bitcoins"),
+      Bytes.toBytes("com.google.news.india"),
+      Bytes.toBytes("com.zee.news.us")
+  };
+  private byte[] tableName = Bytes.toBytes("testCompoundRowPrefixFilter");
+  private byte[] familyName = Bytes.toBytes("cf");
+  private HRegion region = null;
+  private Configuration conf = null;
+  private Path testDir = null;
+
+  private static HBaseTestingUtility util = new HBaseTestingUtility();
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    util.startMiniCluster();
+  }
+
+  public void setup() throws IOException {
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    htd.addFamily(new HColumnDescriptor(familyName));
+    HRegionInfo info = new HRegionInfo(htd, null, null, false);
+    this.testDir = util.getTestDir(Bytes.toString(tableName));
+    this.conf = util.getConfiguration();
+    this.region = HRegion.createHRegion(info, testDir, this.conf);
+    this.region.flushcache();
+    loadData();
+  }
+
+  private void loadData() throws IOException {
+    for (byte[] key : keys) {
+      Put p = new Put(key);
+      p.add(familyName, null, key);
+      region.put(p);
+    }
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    util.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testSerDe() throws IOException {
+    CompoundRowPrefixFilter filter = new CompoundRowPrefixFilter.Builder()
+      .addRowPrefix(PREFIXES[0])
+      .addRowPrefix(PREFIXES[1])
+      .addRowPrefix(PREFIXES[2])
+      .create();
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    filter.write(dos);
+    dos.close();
+    byte[] byteArray = baos.toByteArray();
+    ByteArrayInputStream bais = new ByteArrayInputStream(byteArray);
+    DataInputStream dis = new DataInputStream(bais);
+    CompoundRowPrefixFilter deserializedFilter = new CompoundRowPrefixFilter();
+    deserializedFilter.readFields(dis);
+    assertTrue(filter.equals(deserializedFilter));
+  }
+
+  private Filter createFilter() {
+    Filter filter = new CompoundRowPrefixFilter.Builder()
+    .addRowPrefix(PREFIXES[0])
+    .addRowPrefix(PREFIXES[1])
+    .addRowPrefix(PREFIXES[2])
+    .create();
+    return filter;
+  }
+
+  @Test
+  public void testFilterBehaviour () {
+    Filter filter = createFilter();
+
+    assertTrue(!filter.filterRowKey(keys[0], 0, keys[0].length));
+    assertTrue(!filter.filterAllRemaining());
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[0])) ==
+        ReturnCode.SEEK_NEXT_USING_HINT);
+    assertTrue(Bytes.compareTo(
+        filter.getNextKeyHint(KeyValue.createFirstOnRow(keys[0])).getRow(),
+        PREFIXES[0]) == 0);
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[1])) ==
+        ReturnCode.INCLUDE);
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[2])) ==
+        ReturnCode.INCLUDE);
+    assertTrue(!filter.filterAllRemaining());
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[3])) ==
+        ReturnCode.SEEK_NEXT_USING_HINT);
+    assertTrue(Bytes.compareTo(filter.getNextKeyHint(
+        KeyValue.createFirstOnRow(keys[3])).getRow(), PREFIXES[2]) == 0);
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[5]))
+        == ReturnCode.INCLUDE);
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[6]))
+        == ReturnCode.INCLUDE);
+    assertTrue(Bytes.compareTo(filter.getNextKeyHint(
+        KeyValue.createFirstOnRow(keys[6])).getRow(), PREFIXES[1]) == 0);
+    assertTrue(filter.filterKeyValue(KeyValue.createFirstOnRow(keys[7]))
+        == ReturnCode.SKIP);
+    assertTrue(filter.filterAllRemaining());
+  }
+
+  @Test
+  public void testScannerBehavior() throws IOException {
+    setup();
+    Scan s = new Scan();
+    s.setStartRow(Bytes.toBytes(""));
+    s.setStopRow(Bytes.toBytes("zzz"));
+    Filter f = createFilter();
+    s.setFilter(f);
+    InternalScanner scanner = this.region.getScanner(s);
+    verifyScan(scanner);
+  }
+
+  private void verifyScan(InternalScanner scanner) throws IOException {
+    List<KeyValue> results = new ArrayList<KeyValue>();
+
+    scanner.next(results);
+    assertTrue(results.size() == 1);
+    assertTrue(Bytes.compareTo(results.get(0).getRow(), keys[1]) == 0);
+    results.clear();
+
+    scanner.next(results);
+    assertTrue(results.size() == 1);
+    assertTrue(Bytes.compareTo(results.get(0).getRow(), keys[2]) == 0);
+    results.clear();
+
+    scanner.next(results);
+    assertTrue(results.size() == 1);
+    assertTrue(Bytes.compareTo(results.get(0).getRow(), keys[5]) == 0);
+    results.clear();
+
+    scanner.next(results);
+    assertTrue(results.size() == 1);
+    assertTrue(Bytes.compareTo(results.get(0).getRow(), keys[6]) == 0);
+    results.clear();
+  }
+}



Mime
View raw message