Return-Path: Delivered-To: apmail-hadoop-hbase-commits-archive@locus.apache.org Received: (qmail 16397 invoked from network); 12 Mar 2008 16:58:48 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 12 Mar 2008 16:58:48 -0000 Received: (qmail 19795 invoked by uid 500); 12 Mar 2008 16:58:45 -0000 Delivered-To: apmail-hadoop-hbase-commits-archive@hadoop.apache.org Received: (qmail 19777 invoked by uid 500); 12 Mar 2008 16:58:45 -0000 Mailing-List: contact hbase-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hbase-dev@hadoop.apache.org Delivered-To: mailing list hbase-commits@hadoop.apache.org Received: (qmail 19743 invoked by uid 99); 12 Mar 2008 16:58:45 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 12 Mar 2008 09:58:45 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 12 Mar 2008 16:58:16 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id AEB111A9832; Wed, 12 Mar 2008 09:58:26 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r636415 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/ipc/ src/java/org/apache/hadoop/hbase/regionserver/ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/regionserver/ Date: Wed, 12 Mar 2008 16:58:23 -0000 To: hbase-commits@hadoop.apache.org From: bryanduxbury@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080312165826.AEB111A9832@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: bryanduxbury Date: Wed Mar 12 09:58:10 2008 New Revision: 636415 URL: http://svn.apache.org/viewvc?rev=636415&view=rev Log: HBASE-40 [hbase] Add a method of getting multiple (but not all) cells for a row at once -new override of getRow added to the whole stack -test for new override added to TestGet2 Modified: hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Wed Mar 12 09:58:10 2008 @@ -5,9 +5,11 @@ 0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack) NEW FEATURES - + HBASE-40 Add a method of getting multiple (but not all) cells for a row at once + OPTIMIZATIONS - + HBASE-430 Performance: Scanners and getRow return maps with duplicate data + BUG FIXES HBASE-19 CountingBloomFilter can overflow its storage (Stu Hood and Bryan Duxbury via Stack) Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java Wed Mar 12 09:58:10 2008 @@ -362,6 +362,50 @@ } /** + * Get selected columns for the specified row at the latest timestamp + * + * @param row row key + * @param columns Array of column names you want to retrieve. + * @return Map of columns to values. Map is empty if row does not exist. + * @throws IOException + */ + public SortedMap getRow(final Text row, final Text[] columns) + throws IOException { + return getRow(row, columns, HConstants.LATEST_TIMESTAMP); + } + + /** + * Get selected columns for the specified row at a specified timestamp + * + * @param row row key + * @param columns Array of column names you want to retrieve. + * @param ts timestamp + * @return Map of columns to values. Map is empty if row does not exist. + * @throws IOException + */ + public SortedMap getRow(final Text row, final Text[] columns, + final long ts) + throws IOException { + HbaseMapWritable value = null; + + value = getRegionServerWithRetries(new ServerCallable(row) { + public HbaseMapWritable call() throws IOException { + return server.getRow(location.getRegionInfo().getRegionName(), row, + columns, ts); + } + }); + + SortedMap results = new TreeMap(); + if (value != null && value.size() != 0) { + for (Map.Entry e: value.entrySet()) { + HStoreKey key = (HStoreKey) e.getKey(); + results.put(key.getColumn(), (Cell)e.getValue()); + } + } + return results; + } + + /** * Get a scanner on the current table starting at the specified row. * Return the specified columns. * Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java Wed Mar 12 09:58:10 2008 @@ -93,17 +93,6 @@ throws IOException; /** - * Get all the data for the specified row - * - * @param regionName region name - * @param row row key - * @return map of values - * @throws IOException - */ - public HbaseMapWritable getRow(final Text regionName, final Text row) - throws IOException; - - /** * Get all the data for the specified row at a given timestamp * * @param regionName region name @@ -138,6 +127,30 @@ */ public HbaseMapWritable getClosestRowBefore(final Text regionName, final Text row, final long ts) + throws IOException; + + /** + * Get selected columns for the specified row at a given timestamp. + * + * @param regionName region name + * @param row row key + * @return map of values + * @throws IOException + */ + public HbaseMapWritable getRow(final Text regionName, final Text row, + final Text[] columns, final long ts) + throws IOException; + + /** + * Get selected columns for the specified row at the latest timestamp. + * + * @param regionName region name + * @param row row key + * @return map of values + * @throws IOException + */ + public HbaseMapWritable getRow(final Text regionName, final Text row, + final Text[] columns) throws IOException; /** Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Wed Mar 12 09:58:10 2008 @@ -24,6 +24,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Random; import java.util.SortedMap; import java.util.TreeMap; @@ -1068,24 +1069,6 @@ } /** - * Fetch all the columns for the indicated row. - * Returns a TreeMap that maps column names to values. - * - * We should eventually use Bloom filters here, to reduce running time. If - * the database has many column families and is very sparse, then we could be - * checking many files needlessly. A small Bloom for each row would help us - * determine which column groups are useful for that row. That would let us - * avoid a bunch of disk activity. - * - * @param row - * @return Map values - * @throws IOException - */ - public Map getFull(Text row) throws IOException { - return getFull(row, HConstants.LATEST_TIMESTAMP); - } - - /** * Fetch all the columns for the indicated row at a specified timestamp. * Returns a TreeMap that maps column names to values. * @@ -1096,18 +1079,21 @@ * avoid a bunch of disk activity. * * @param row + * @param columns Array of columns you'd like to retrieve. When null, get all. * @param ts - * @return Map values + * @return Map values * @throws IOException */ - public Map getFull(Text row, long ts) throws IOException { + public Map getFull(final Text row, final Set columns, + final long ts) + throws IOException { HStoreKey key = new HStoreKey(row, ts); obtainRowLock(row); try { TreeMap result = new TreeMap(); for (Text colFamily: stores.keySet()) { HStore targetStore = stores.get(colFamily); - targetStore.getFull(key, result); + targetStore.getFull(key, columns, result); } return result; } finally { @@ -1162,7 +1148,7 @@ TreeMap result = new TreeMap(); for (Text colFamily: stores.keySet()) { HStore targetStore = stores.get(colFamily); - targetStore.getFull(key, result); + targetStore.getFull(key, null, result); } return result; Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Mar 12 09:58:10 2008 @@ -24,6 +24,7 @@ import java.lang.reflect.Constructor; import java.net.InetSocketAddress; import java.net.UnknownHostException; +import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -948,20 +949,34 @@ } /** {@inheritDoc} */ - public HbaseMapWritable getRow(final Text regionName, final Text row) + public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts) throws IOException { - return getRow(regionName, row, HConstants.LATEST_TIMESTAMP); + return getRow(regionName, row, null, ts); } /** {@inheritDoc} */ - public HbaseMapWritable getRow(final Text regionName, final Text row, final long ts) + public HbaseMapWritable getRow(final Text regionName, final Text row, + final Text[] columns) + throws IOException { + return getRow(regionName, row, columns, HConstants.LATEST_TIMESTAMP); + } + + /** {@inheritDoc} */ + public HbaseMapWritable getRow(final Text regionName, final Text row, + final Text[] columns, final long ts) throws IOException { checkOpen(); requestCount.incrementAndGet(); try { + // convert the columns array into a set so it's easy to check later. + Set columnSet = new HashSet(); + if (columns != null) { + columnSet.addAll(Arrays.asList(columns)); + } + HRegion region = getRegion(regionName); + Map map = region.getFull(row, columnSet, ts); HbaseMapWritable result = new HbaseMapWritable(); - Map map = region.getFull(row, ts); for (Map.Entry es: map.entrySet()) { result.put(new HStoreKey(row, es.getKey()), es.getValue()); } Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Wed Mar 12 09:58:10 2008 @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.Map.Entry; @@ -184,14 +185,14 @@ * @param key * @param results */ - void getFull(HStoreKey key, SortedMap results) { + void getFull(HStoreKey key, Set columns, SortedMap results) { this.lock.readLock().lock(); try { synchronized (memcache) { - internalGetFull(memcache, key, results); + internalGetFull(memcache, key, columns, results); } synchronized (snapshot) { - internalGetFull(snapshot, key, results); + internalGetFull(snapshot, key, columns, results); } } finally { @@ -200,7 +201,7 @@ } private void internalGetFull(SortedMap map, HStoreKey key, - SortedMap results) { + Set columns, SortedMap results) { if (map.isEmpty() || key == null) { return; @@ -214,7 +215,9 @@ byte [] val = tailMap.get(itKey); if (!HLogEdit.isDeleted(val)) { - results.put(itCol, new Cell(val, itKey.getTimestamp())); + if (columns == null || columns.contains(itKey.getColumn())) { + results.put(itCol, new Cell(val, itKey.getTimestamp())); + } } } else if (key.getRow().compareTo(itKey.getRow()) < 0) { @@ -1601,24 +1604,36 @@ * Return all the available columns for the given key. The key indicates a * row and timestamp, but not a column name. * - * The returned object should map column names to byte arrays (byte[]). + * The returned object should map column names to Cells. */ - void getFull(HStoreKey key, TreeMap results) - throws IOException { + void getFull(HStoreKey key, final Set columns, TreeMap results) + throws IOException { Map> deletes = new HashMap>(); + // if the key is null, we're not even looking for anything. return. if (key == null) { return; } this.lock.readLock().lock(); - memcache.getFull(key, results); + + // get from the memcache first. + memcache.getFull(key, columns, results); + try { MapFile.Reader[] maparray = getReaders(); + + // examine each mapfile for (int i = maparray.length - 1; i >= 0; i--) { MapFile.Reader map = maparray[i]; + + // synchronize on the map so that no one else iterates it at the same + // time synchronized(map) { + // seek back to the beginning map.reset(); + + // seek to the closest key that should match the row we're looking for ImmutableBytesWritable readval = new ImmutableBytesWritable(); HStoreKey readkey = (HStoreKey)map.getClosest(key, readval); if (readkey == null) { @@ -1631,7 +1646,9 @@ if(isDeleted(readkey, readval.get(), true, deletes)) { break; } - results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp())); + if (columns == null || columns.contains(readkey.getColumn())) { + results.put(new Text(readcol), new Cell(readval.get(), readkey.getTimestamp())); + } readval = new ImmutableBytesWritable(); } else if(key.getRow().compareTo(readkey.getRow()) < 0) { break; Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java (original) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseTestCase.java Wed Mar 12 09:58:10 2008 @@ -496,7 +496,7 @@ * @throws IOException */ public Map getFull(Text row) throws IOException { - return region.getFull(row); + return region.getFull(row, null, HConstants.LATEST_TIMESTAMP); } /** {@inheritDoc} */ public void flushcache() throws IOException { @@ -567,7 +567,7 @@ protected void assertCellEquals(final HRegion region, final Text row, final Text column, final long timestamp, final String value) throws IOException { - Map result = region.getFull(row, timestamp); + Map result = region.getFull(row, null, timestamp); Cell cell_value = result.get(column); if(value == null){ assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value); Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java (original) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestGet2.java Wed Mar 12 09:58:10 2008 @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Map; +import java.util.HashSet; import java.util.TreeMap; import org.apache.hadoop.dfs.MiniDFSCluster; @@ -33,6 +34,7 @@ import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.io.Cell; +import org.apache.hadoop.hbase.io.BatchUpdate; /** * {@link TestGet} is a medley of tests of get all done up as a single test. @@ -224,10 +226,84 @@ } } } + + /** + * For HBASE-40 + */ + public void testGetFullWithSpecifiedColumns() throws IOException { + HRegion region = null; + HRegionIncommon region_incommon = null; + try { + HTableDescriptor htd = createTableDescriptor(getName()); + region = createNewHRegion(htd, null, null); + region_incommon = new HRegionIncommon(region); + + // write a row with a bunch of columns + Text row = new Text("some_row"); + BatchUpdate bu = new BatchUpdate(row); + bu.put(COLUMNS[0], "column 0".getBytes()); + bu.put(COLUMNS[1], "column 1".getBytes()); + bu.put(COLUMNS[2], "column 2".getBytes()); + region.batchUpdate(bu); + + assertSpecifiedColumns(region, row); + // try it again with a cache flush to involve the store, not just the + // memcache. + region_incommon.flushcache(); + assertSpecifiedColumns(region, row); + + } finally { + if (region != null) { + try { + region.close(); + } catch (Exception e) { + e.printStackTrace(); + } + region.getLog().closeAndDelete(); + } + } + } + + private void assertSpecifiedColumns(final HRegion region, final Text row) + throws IOException { + HashSet all = new HashSet(); + HashSet one = new HashSet(); + HashSet none = new HashSet(); + + all.add(COLUMNS[0]); + all.add(COLUMNS[1]); + all.add(COLUMNS[2]); + one.add(COLUMNS[0]); + + // make sure we get all of them with standard getFull + Map result = region.getFull(row, null, + HConstants.LATEST_TIMESTAMP); + assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0"); + assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1"); + assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2"); + + // try to get just one + result = region.getFull(row, one, HConstants.LATEST_TIMESTAMP); + assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0"); + assertNull(result.get(COLUMNS[1])); + assertNull(result.get(COLUMNS[2])); + + // try to get all of them (specified) + result = region.getFull(row, all, HConstants.LATEST_TIMESTAMP); + assertEquals(new String(result.get(COLUMNS[0]).getValue()), "column 0"); + assertEquals(new String(result.get(COLUMNS[1]).getValue()), "column 1"); + assertEquals(new String(result.get(COLUMNS[2]).getValue()), "column 2"); + // try to get none with empty column set + result = region.getFull(row, none, HConstants.LATEST_TIMESTAMP); + assertNull(result.get(COLUMNS[0])); + assertNull(result.get(COLUMNS[1])); + assertNull(result.get(COLUMNS[2])); + } + private void assertColumnsPresent(final HRegion r, final Text row) throws IOException { - Map result = r.getFull(row); + Map result = r.getFull(row, null, HConstants.LATEST_TIMESTAMP); int columnCount = 0; for (Map.Entry e: result.entrySet()) { columnCount++; Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java?rev=636415&r1=636414&r2=636415&view=diff ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java (original) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHMemcache.java Wed Mar 12 09:58:10 2008 @@ -136,7 +136,7 @@ for (int i = 0; i < ROW_COUNT; i++) { HStoreKey hsk = new HStoreKey(getRowName(i)); TreeMap all = new TreeMap(); - this.hmemcache.getFull(hsk, all); + this.hmemcache.getFull(hsk, null, all); isExpectedRow(i, all); } }