hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bryanduxb...@apache.org
Subject svn commit: r636415 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/ipc/ src/java/org/apache/hadoop/hbase/regionserver/ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/regionserver/
Date Wed, 12 Mar 2008 16:58:23 GMT
Author: bryanduxbury
Date: Wed Mar 12 09:58:10 2008
New Revision: 636415

URL: http://svn.apache.org/viewvc?rev=636415&view=rev
Log:
HBASE-40 [hbase] Add a method of getting multiple (but not all) cells for a row at once
-new override of getRow added to the whole stack
-test for new override added to TestGet2

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Wed Mar 12 09:58:10 2008
@@ -5,9 +5,11 @@
             0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack) 
 
   NEW FEATURES
-
+   HBASE-40    Add a method of getting multiple (but not all) cells for a row at once
+   
   OPTIMIZATIONS
-
+   HBASE-430   Performance: Scanners and getRow return maps with duplicate data
+   
   BUG FIXES
    HBASE-19    CountingBloomFilter can overflow its storage
                (Stu Hood and Bryan Duxbury via Stack)

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java Wed Mar 12 09:58:10
2008
@@ -362,6 +362,50 @@
   }
 
   /** 
+   * Get selected columns for the specified row at the latest timestamp
+   * 
+   * @param row row key
+   * @param columns Array of column names you want to retrieve.
+   * @return Map of columns to values.  Map is empty if row does not exist.
+   * @throws IOException
+   */
+  public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns) 
+  throws IOException {
+    return getRow(row, columns, HConstants.LATEST_TIMESTAMP);
+  }
+
+  /** 
+   * Get selected columns for the specified row at a specified timestamp
+   * 
+   * @param row row key
+   * @param columns Array of column names you want to retrieve.   
+   * @param ts timestamp
+   * @return Map of columns to values.  Map is empty if row does not exist.
+   * @throws IOException
+   */
+  public SortedMap<Text, Cell> getRow(final Text row, final Text[] columns, 
+    final long ts) 
+  throws IOException {
+    HbaseMapWritable value = null;
+         
+    value = getRegionServerWithRetries(new ServerCallable<HbaseMapWritable>(row) {
+      public HbaseMapWritable call() throws IOException {
+        return server.getRow(location.getRegionInfo().getRegionName(), row, 
+          columns, ts);
+      }
+    });
+    
+    SortedMap<Text, Cell> results = new TreeMap<Text, Cell>();
+    if (value != null && value.size() != 0) {
+      for (Map.Entry<Writable, Writable> e: value.entrySet()) {
+        HStoreKey key = (HStoreKey) e.getKey();
+        results.put(key.getColumn(), (Cell)e.getValue());
+      }
+    }
+    return results;
+  }
+
+  /** 
    * Get a scanner on the current table starting at the specified row.
    * Return the specified columns.
    *

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java Wed Mar
12 09:58:10 2008
@@ -93,17 +93,6 @@
   throws IOException;
   
   /**
-   * Get all the data for the specified row
-   * 
-   * @param regionName region name
-   * @param row row key
-   * @return map of values
-   * @throws IOException
-   */
-  public HbaseMapWritable getRow(final Text regionName, final Text row)
-  throws IOException;
-
-  /**
    * Get all the data for the specified row at a given timestamp
    * 
    * @param regionName region name
@@ -138,6 +127,30 @@
    */
   public HbaseMapWritable getClosestRowBefore(final Text regionName, 
     final Text row, final long ts)
+  throws IOException;
+
+  /**
+   * Get selected columns for the specified row at a given timestamp.
+   * 
+   * @param regionName region name
+   * @param row row key
+   * @return map of values
+   * @throws IOException
+   */
+  public HbaseMapWritable getRow(final Text regionName, final Text row, 
+    final Text[] columns, final long ts)
+  throws IOException;
+
+  /**
+   * Get selected columns for the specified row at the latest timestamp.
+   * 
+   * @param regionName region name
+   * @param row row key
+   * @return map of values
+   * @throws IOException
+   */
+  public HbaseMapWritable getRow(final Text regionName, final Text row, 
+    final Text[] columns)
   throws IOException;
 
   /**

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Wed Mar
12 09:58:10 2008
@@ -24,6 +24,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Random;
 import java.util.SortedMap;
 import java.util.TreeMap;
@@ -1068,24 +1069,6 @@
   }
 
   /**
-   * Fetch all the columns for the indicated row.
-   * Returns a TreeMap that maps column names to values.
-   *
-   * We should eventually use Bloom filters here, to reduce running time.  If 
-   * the database has many column families and is very sparse, then we could be 
-   * checking many files needlessly.  A small Bloom for each row would help us 
-   * determine which column groups are useful for that row.  That would let us 
-   * avoid a bunch of disk activity.
-   *
-   * @param row
-   * @return Map<columnName, byte[]> values
-   * @throws IOException
-   */
-  public Map<Text, Cell> getFull(Text row) throws IOException {
-    return getFull(row, HConstants.LATEST_TIMESTAMP);
-  }
-
-  /**
    * Fetch all the columns for the indicated row at a specified timestamp.
    * Returns a TreeMap that maps column names to values.
    *
@@ -1096,18 +1079,21 @@
    * avoid a bunch of disk activity.
    *
    * @param row
+   * @param columns Array of columns you'd like to retrieve. When null, get all.
    * @param ts
-   * @return Map<columnName, byte[]> values
+   * @return Map<columnName, Cell> values
    * @throws IOException
    */
-  public Map<Text, Cell> getFull(Text row, long ts) throws IOException {
+  public Map<Text, Cell> getFull(final Text row, final Set<Text> columns, 
+    final long ts) 
+  throws IOException {
     HStoreKey key = new HStoreKey(row, ts);
     obtainRowLock(row);
     try {
       TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
       for (Text colFamily: stores.keySet()) {
         HStore targetStore = stores.get(colFamily);
-        targetStore.getFull(key, result);
+        targetStore.getFull(key, columns, result);
       }
       return result;
     } finally {
@@ -1162,7 +1148,7 @@
       TreeMap<Text, Cell> result = new TreeMap<Text, Cell>();
       for (Text colFamily: stores.keySet()) {
         HStore targetStore = stores.get(colFamily);
-        targetStore.getFull(key, result);
+        targetStore.getFull(key, null, result);
       }
       
       return result;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed
Mar 12 09:58:10 2008
@@ -24,6 +24,7 @@
 import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
+import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -948,20 +949,34 @@
   }
 
   /** {@inheritDoc} */
-  public HbaseMapWritable getRow(final Text regionName, final Text row)
+  public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts)
   throws IOException {
-    return getRow(regionName, row, HConstants.LATEST_TIMESTAMP);
+    return getRow(regionName, row, null, ts);
   }
 
   /** {@inheritDoc} */
-  public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts)
+  public HbaseMapWritable getRow(final Text regionName, final Text row, 
+    final Text[] columns)
+  throws IOException {
+    return getRow(regionName, row, columns, HConstants.LATEST_TIMESTAMP);
+  }
+
+  /** {@inheritDoc} */
+  public HbaseMapWritable getRow(final Text regionName, final Text row, 
+    final Text[] columns, final long ts)
   throws IOException {
     checkOpen();
     requestCount.incrementAndGet();
     try {
+      // convert the columns array into a set so it's easy to check later.
+      Set<Text> columnSet = new HashSet<Text>();
+      if (columns != null) {
+        columnSet.addAll(Arrays.asList(columns));
+      }
+      
       HRegion region = getRegion(regionName);
+      Map<Text, Cell> map = region.getFull(row, columnSet, ts);
       HbaseMapWritable result = new HbaseMapWritable();
-      Map<Text, Cell> map = region.getFull(row, ts);
       for (Map.Entry<Text, Cell> es: map.entrySet()) {
         result.put(new HStoreKey(row, es.getKey()), es.getValue());
       }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Wed Mar 12
09:58:10 2008
@@ -27,6 +27,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.Map.Entry;
@@ -184,14 +185,14 @@
      * @param key
      * @param results
      */
-    void getFull(HStoreKey key, SortedMap<Text, Cell> results) {
+    void getFull(HStoreKey key, Set<Text> columns, SortedMap<Text, Cell> results)
{
       this.lock.readLock().lock();
       try {
         synchronized (memcache) {
-          internalGetFull(memcache, key, results);
+          internalGetFull(memcache, key, columns, results);
         }
         synchronized (snapshot) {
-          internalGetFull(snapshot, key, results);
+          internalGetFull(snapshot, key, columns, results);
         }
 
       } finally {
@@ -200,7 +201,7 @@
     }
 
     private void internalGetFull(SortedMap<HStoreKey, byte[]> map, HStoreKey key, 
-        SortedMap<Text, Cell> results) {
+      Set<Text> columns, SortedMap<Text, Cell> results) {
 
       if (map.isEmpty() || key == null) {
         return;
@@ -214,7 +215,9 @@
           byte [] val = tailMap.get(itKey);
 
           if (!HLogEdit.isDeleted(val)) {
-            results.put(itCol, new Cell(val, itKey.getTimestamp()));
+            if (columns == null || columns.contains(itKey.getColumn())) {
+              results.put(itCol, new Cell(val, itKey.getTimestamp()));
+            }
           }
 
         } else if (key.getRow().compareTo(itKey.getRow()) < 0) {
@@ -1601,24 +1604,36 @@
    * Return all the available columns for the given key.  The key indicates a 
    * row and timestamp, but not a column name.
    *
-   * The returned object should map column names to byte arrays (byte[]).
+   * The returned object should map column names to Cells.
    */
-  void getFull(HStoreKey key, TreeMap<Text, Cell> results)
-    throws IOException {
+  void getFull(HStoreKey key, final Set<Text> columns, TreeMap<Text, Cell> results)
+  throws IOException {
     Map<Text, List<Long>> deletes = new HashMap<Text, List<Long>>();
     
+    // if the key is null, we're not even looking for anything. return.
     if (key == null) {
       return;
     }
     
     this.lock.readLock().lock();
-    memcache.getFull(key, results);
+    
+    // get from the memcache first.
+    memcache.getFull(key, columns, results);
+    
     try {
       MapFile.Reader[] maparray = getReaders();
+      
+      // examine each mapfile
       for (int i = maparray.length - 1; i >= 0; i--) {
         MapFile.Reader map = maparray[i];
+        
+        // synchronize on the map so that no one else iterates it at the same 
+        // time
         synchronized(map) {
+          // seek back to the beginning
           map.reset();
+          
+          // seek to the closest key that should match the row we're looking for
           ImmutableBytesWritable readval = new ImmutableBytesWritable();
           HStoreKey readkey = (HStoreKey)map.getClosest(key, readval);
           if (readkey == null) {
@@ -1631,7 +1646,9 @@
               if(isDeleted(readkey, readval.get(), true, deletes)) {
                 break;
               }
-              results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp()));
+              if (columns == null || columns.contains(readkey.getColumn())) {
+                results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp()));
+              }
               readval = new ImmutableBytesWritable();
             } else if(key.getRow().compareTo(readkey.getRow()) < 0) {
               break;

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java Wed Mar 12 09:58:10
2008
@@ -496,7 +496,7 @@
      * @throws IOException
      */
     public Map<Text, Cell> getFull(Text row) throws IOException {
-      return region.getFull(row);
+      return region.getFull(row, null, HConstants.LATEST_TIMESTAMP);
     }
     /** {@inheritDoc} */
     public void flushcache() throws IOException {
@@ -567,7 +567,7 @@
   protected void assertCellEquals(final HRegion region, final Text row,
     final Text column, final long timestamp, final String value)
   throws IOException {
-    Map<Text, Cell> result = region.getFull(row, timestamp);
+    Map<Text, Cell> result = region.getFull(row, null, timestamp);
     Cell cell_value = result.get(column);
     if(value == null){
       assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value);

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java Wed Mar
12 09:58:10 2008
@@ -21,6 +21,7 @@
 
 import java.io.IOException;
 import java.util.Map;
+import java.util.HashSet;
 import java.util.TreeMap;
 
 import org.apache.hadoop.dfs.MiniDFSCluster;
@@ -33,6 +34,7 @@
 import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.io.BatchUpdate;
 
 /**
  * {@link TestGet} is a medley of tests of get all done up as a single test.
@@ -224,10 +226,84 @@
       }
     }
   }
+
+  /**
+   * For HBASE-40
+   */
+  public void testGetFullWithSpecifiedColumns() throws IOException {
+    HRegion region = null;
+    HRegionIncommon region_incommon = null;
+    try {
+      HTableDescriptor htd = createTableDescriptor(getName());
+      region = createNewHRegion(htd, null, null);
+      region_incommon = new HRegionIncommon(region);
+      
+      // write a row with a bunch of columns
+      Text row = new Text("some_row");
+      BatchUpdate bu = new BatchUpdate(row);
+      bu.put(COLUMNS[0], "column 0".getBytes());
+      bu.put(COLUMNS[1], "column 1".getBytes());
+      bu.put(COLUMNS[2], "column 2".getBytes());
+      region.batchUpdate(bu);
+      
+      assertSpecifiedColumns(region, row);
+      // try it again with a cache flush to involve the store, not just the 
+      // memcache.
+      region_incommon.flushcache();
+      assertSpecifiedColumns(region, row);
+      
+    } finally {
+      if (region != null) {
+        try {
+          region.close();
+        } catch (Exception e) {
+          e.printStackTrace();
+        }
+        region.getLog().closeAndDelete();
+      }
+    }    
+  }
+    
+  private void assertSpecifiedColumns(final HRegion region, final Text row) 
+  throws IOException {
+    HashSet<Text> all = new HashSet<Text>();
+    HashSet<Text> one = new HashSet<Text>();
+    HashSet<Text> none = new HashSet<Text>();
+    
+    all.add(COLUMNS[0]);
+    all.add(COLUMNS[1]);
+    all.add(COLUMNS[2]);      
+    one.add(COLUMNS[0]);
+
+    // make sure we get all of them with standard getFull
+    Map<Text, Cell> result = region.getFull(row, null, 
+      HConstants.LATEST_TIMESTAMP);
+    assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
+    assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
+    assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
+          
+    // try to get just one
+    result = region.getFull(row, one, HConstants.LATEST_TIMESTAMP);
+    assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
+    assertNull(result.get(COLUMNS[1]));                                   
+    assertNull(result.get(COLUMNS[2]));                                   
+                                                                          
+    // try to get all of them (specified)                                 
+    result = region.getFull(row, all, HConstants.LATEST_TIMESTAMP);       
+    assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0");
+    assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1");
+    assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2");
     
+    // try to get none with empty column set
+    result = region.getFull(row, none, HConstants.LATEST_TIMESTAMP);
+    assertNull(result.get(COLUMNS[0]));
+    assertNull(result.get(COLUMNS[1]));
+    assertNull(result.get(COLUMNS[2]));    
+  }  
+  
   private void assertColumnsPresent(final HRegion r, final Text row)
   throws IOException {
-    Map<Text, Cell> result = r.getFull(row);
+    Map<Text, Cell> result = r.getFull(row, null, HConstants.LATEST_TIMESTAMP);
     int columnCount = 0;
     for (Map.Entry<Text, Cell> e: result.entrySet()) {
       columnCount++;

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java?rev=636415&r1=636414&r2=636415&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java Wed
Mar 12 09:58:10 2008
@@ -136,7 +136,7 @@
     for (int i = 0; i < ROW_COUNT; i++) {
       HStoreKey hsk = new HStoreKey(getRowName(i));
       TreeMap<Text, Cell> all = new TreeMap<Text, Cell>();
-      this.hmemcache.getFull(hsk, all);
+      this.hmemcache.getFull(hsk, null, all);
       isExpectedRow(i, all);
     }
   }



Mime
View raw message