hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r525267 [1/5] - in /lucene/hadoop/trunk: ./ src/contrib/hbase/ src/contrib/hbase/src/ src/contrib/hbase/src/java/ src/contrib/hbase/src/java/org/ src/contrib/hbase/src/java/org/apache/ src/contrib/hbase/src/java/org/apache/hadoop/ src/contr...
Date Tue, 03 Apr 2007 20:34:30 GMT
Author: cutting
Date: Tue Apr  3 13:34:28 2007
New Revision: 525267

URL: http://svn.apache.org/viewvc?view=rev&rev=525267
Log:
HADOOP-1045.  Add contrib/hbase, a BigTable-like online database.

Added:
    lucene/hadoop/trunk/src/contrib/hbase/
    lucene/hadoop/trunk/src/contrib/hbase/README.txt
    lucene/hadoop/trunk/src/contrib/hbase/build.xml
    lucene/hadoop/trunk/src/contrib/hbase/src/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLocking.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterRegionInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMemcache.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMsg.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HScannerInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerAddress.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreFile.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LabelledData.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LeaseListener.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/Environment.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestHRegion.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=525267&r1=525266&r2=525267
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Apr  3 13:34:28 2007
@@ -88,6 +88,9 @@
 
 27. HADOOP-1081.  Fix bin/hadoop on Darwin.  (Michael Bieniosek via cutting)
 
+28. HADOOP-1045.  Add contrib/hbase, a BigTable-like online database.
+    (Jim Kellerman via cutting)
+
 
 Release 0.12.3 (not yet released)
 

Added: lucene/hadoop/trunk/src/contrib/hbase/README.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/README.txt?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/README.txt (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/README.txt Tue Apr  3 13:34:28 2007
@@ -0,0 +1,285 @@
+HBASE
+Michael Cafarella
+
+
+This document gives a quick overview of HBase, the Hadoop simple
+database. It is extremely similar to Google's BigTable, with a just a
+few differences. If you understand BigTable, great. If not, you should
+still be able to understand this document.
+
+---------------------------------------------------------------
+I.
+
+HBase uses a data model very similar to that of BigTable. Users store
+data rows in labelled tables. A data row has a sortable key and an
+arbitrary number of columns. The table is stored sparsely, so that
+rows in the same table can have crazily-varying columns, if the user
+likes.
+
+A column name has the form "<group>:<label>" where <group> and <label>
+can be any string you like. A single table enforces its set of
+<group>s (called "column groups"). You can only adjust this set of
+groups by performing administrative operations on the table. However,
+you can use new <label> strings at any write without preannouncing
+it. HBase stores column groups physically close on disk. So the items
+in a given column group should have roughly the same write/read
+behavior.
+
+Writes are row-locked only. You cannot lock multiple rows at once. All
+row-writes are atomic by default.
+
+All updates to the database have an associated timestamp. The HBase
+will store a configurable number of versions of a given cell. Clients
+can get data by asking for the "most recent value as of a certain
+time". Or, clients can fetch all available versions at once.
+
+---------------------------------------------------------------
+II.
+
+To the user, a table seems like a list of data tuples, sorted by row
+key. Physically, tables are broken into HRegions. An HRegion is
+identified by its tablename plus a start/end-key pair. A given HRegion
+with keys <start> and <end> will store all the rows from (<start>,
+<end>]. A set of HRegions, sorted appropriately, forms an entire
+table.
+
+All data is physically stored using Hadoop's DFS. Data is served to
+clients by a set of HRegionServers, usually one per machine. A given
+HRegion is served by only one HRegionServer at a time.
+
+When a client wants to make updates, it contacts the relevant
+HRegionServer and commits the update to an HRegion. Upon commit, the
+data is added to the HRegion's HMemcache and to the HRegionServer's
+HLog. The HMemcache is a memory buffer that stores and serves the
+most-recent updates. The HLog is an on-disk log file that tracks all
+updates. The commit() call will not return to the client until the
+update has been written to the HLog.
+
+When serving data, the HRegion will first check its HMemcache. If not
+available, it will then check its on-disk HStores. There is an HStore
+for each column family in an HRegion. An HStore might consist of
+multiple on-disk HStoreFiles. Each HStoreFile is a B-Tree-like
+structure that allow for relatively fast access.
+
+Periodically, we invoke HRegion.flushcache() to write the contents of
+the HMemcache to an on-disk HStore's files. This adds a new HStoreFile
+to each HStore. The HMemcache is then emptied, and we write a special
+token to the HLog, indicating the HMemcache has been flushed.
+
+On startup, each HRegion checks to see if there have been any writes
+to the HLog since the most-recent invocation of flushcache(). If not,
+then all relevant HRegion data is reflected in the on-disk HStores. If
+yes, the HRegion reconstructs the updates from the HLog, writes them
+to the HMemcache, and then calls flushcache(). Finally, it deletes the
+HLog and is now available for serving data.
+
+Thus, calling flushcache() infrequently will be less work, but
+HMemcache will consume more memory and the HLog will take a longer
+time to reconstruct upon restart. If flushcache() is called
+frequently, the HMemcache will take less memory, and the HLog will be
+faster to reconstruct, but each flushcache() call imposes some
+overhead.
+
+The HLog is periodically rolled, so it consists of multiple
+time-sorted files. Whenever we roll the HLog, the HLog will delete all
+old log files that contain only flushed data. Rolling the HLog takes
+very little time and is generally a good idea to do from time to time.
+
+Each call to flushcache() will add an additional HStoreFile to each
+HStore. Fetching a file from an HStore can potentially access all of
+its HStoreFiles. This is time-consuming, so we want to periodically
+compact these HStoreFiles into a single larger one. This is done by
+calling HStore.compact().
+
+Compaction is a very expensive operation. It's done automatically at
+startup, and should probably be done periodically during operation.
+
+The Google BigTable paper has a slightly-confusing hierarchy of major
+and minor compactions. We have just two things to keep in mind:
+
+1) A "flushcache()" drives all updates out of the memory buffer into
+   on-disk structures. Upon flushcache, the log-reconstruction time
+   goes to zero. Each flushcache() will add a new HStoreFile to each
+   HStore.
+
+2) a "compact()" consolidates all the HStoreFiles into a single
+   one. It's expensive, and is always done at startup.
+
+Unlike BigTable, Hadoop's HBase allows no period where updates have
+been "committed" but have not been written to the log. This is not
+hard to add, if it's really wanted.
+
+We can merge two HRegions into a single new HRegion by calling
+HRegion.closeAndMerge(). We can split an HRegion into two smaller
+HRegions by calling HRegion.closeAndSplit().
+
+OK, to sum up so far:
+
+1) Clients access data in tables.
+2) tables are broken into HRegions.
+3) HRegions are served by HRegionServers. Clients contact an
+   HRegionServer to access the data within its row-range.
+4) HRegions store data in:
+  a) HMemcache, a memory buffer for recent writes
+  b) HLog, a write-log for recent writes
+  c) HStores, an efficient on-disk set of files. One per col-group.
+     (HStores use HStoreFiles.)
+
+---------------------------------------------------------------
+III.
+
+Each HRegionServer stays in contact with the single HBaseMaster. The
+HBaseMaster is responsible for telling each HRegionServer what
+HRegions it should load and make available.
+
+The HBaseMaster keeps a constant tally of which HRegionServers are
+alive at any time. If the connection between an HRegionServer and the
+HBaseMaster times out, then:
+
+ a) The HRegionServer kills itself and restarts in an empty state.
+
+ b) The HBaseMaster assumes the HRegionServer has died and reallocates
+    its HRegions to other HRegionServers
+
+Note that this is unlike Google's BigTable, where a TabletServer can
+still serve Tablets after its connection to the Master has died. We
+tie them together, because we do not use an external lock-management
+system like BigTable. With BigTable, there's a Master that allocates
+tablets and a lock manager (Chubby) that guarantees atomic access by
+TabletServers to tablets. HBase uses just a single central point for
+all HRegionServers to access: the HBaseMaster.
+
+(This is no more dangerous than what BigTable does. Each system is
+reliant on a network structure (whether HBaseMaster or Chubby) that
+must survive for the data system to survive. There may be some
+Chubby-specific advantages, but that's outside HBase's goals right
+now.)
+
+As HRegionServers check in with a new HBaseMaster, the HBaseMaster
+asks each HRegionServer to load in zero or more HRegions. When the
+HRegionServer dies, the HBaseMaster marks those HRegions as
+unallocated, and attempts to give them to different HRegionServers.
+
+Recall that each HRegion is identified by its table name and its
+key-range. Since key ranges are contiguous, and they always start and
+end with NULL, it's enough to simply indicate the end-key.
+
+Unfortunately, this is not quite enough. Because of merge() and
+split(), we may (for just a moment) have two quite different HRegions
+with the same name. If the system dies at an inopportune moment, both
+HRegions may exist on disk simultaneously. The arbiter of which
+HRegion is "correct" is the HBase meta-information (to be discussed
+shortly). In order to distinguish between different versions of the
+same HRegion, we also add a unique 'regionId' to the HRegion name.
+
+Thus, we finally get to this identifier for an HRegion:
+
+tablename + endkey + regionId.
+
+You can see this identifier being constructed in
+HRegion.buildRegionName().
+
+We can also use this identifier as a row-label in a different
+HRegion. Thus, the HRegion meta-info is itself stored in an
+HRegion. We call this table, which maps from HRegion identifiers to
+physical HRegionServer locations, the META table.
+
+The META table itself can grow large, and may be broken into separate
+HRegions. To locate all components of the META table, we list all META
+HRegions in a ROOT table. The ROOT table is always contained in a
+single HRegion.
+
+Upon startup, the HRegionServer immediately attempts to scan the ROOT
+table (because there is only one HRegion for the ROOT table, that
+HRegion's name is hard-coded). It may have to wait for the ROOT table
+to be allocated to an HRegionServer.
+
+Once the ROOT table is available, the HBaseMaster can scan it and
+learn of all the META HRegions. It then scans the META table. Again,
+the HBaseMaster may have to wait for all the META HRegions to be
+allocated to different HRegionServers.
+
+Finally, when the HBaseMaster has scanned the META table, it knows the
+entire set of HRegions. It can then allocate these HRegions to the set
+of HRegionServers.
+
+The HBaseMaster keeps the set of currently-available HRegionServers in
+memory. Since the death of the HBaseMaster means the death of the
+entire system, there's no reason to store this information on
+disk. All information about the HRegion->HRegionServer mapping is
+stored physically on different tables. Thus, a client does not need to
+contact the HBaseMaster after it learns the location of the ROOT
+HRegion. The load on HBaseMaster should be relatively small: it deals
+with timing out HRegionServers, scanning the ROOT and META upon
+startup, and serving the location of the ROOT HRegion.
+
+The HClient is fairly complicated, and often needs to navigate the
+ROOT and META HRegions when serving a user's request to scan a
+specific user table. If an HRegionServer is unavailable or it does not
+have an HRegion it should have, the HClient will wait and retry. At
+startup or in case of a recent HRegionServer failure, the correct
+mapping info from HRegion to HRegionServer may not always be
+available.
+
+In summary:
+
+1) HRegionServers offer access to HRegions (an HRegion lives at one
+   HRegionServer)
+2) HRegionServers check in with the HBaseMaster
+3) If the HBaseMaster dies, the whole system dies
+4) The set of current HRegionServers is known only to the HBaseMaster
+5) The mapping between HRegions and HRegionServers is stored in two
+   special HRegions, which are allocated to HRegionServers like any
+   other.
+6) The ROOT HRegion is a special one, the location of which the
+   HBaseMaster always knows.
+7) It's the HClient's responsibility to navigate all this.
+
+
+---------------------------------------------------------------
+IV.
+
+What's the current status of all this code?
+
+As of this writing, there is just shy of 7000 lines of code in the
+"hbase" directory.
+
+All of the single-machine operations (safe-committing, merging,
+splitting, versioning, flushing, compacting, log-recovery) are
+complete, have been tested, and seem to work great.
+
+The multi-machine stuff (the HBaseMaster, the HRegionServer, and the
+HClient) have not been fully tested. The reason is that the HClient is
+still incomplete, so the rest of the distributed code cannot be
+fully-tested. I think it's good, but can't be sure until the HClient
+is done. However, the code is now very clean and in a state where
+other people can understand it and contribute.
+
+Other related features and TODOs:
+
+1) Single-machine log reconstruction works great, but distributed log
+   recovery is not yet implemented. This is relatively easy, involving
+   just a sort of the log entries, placing the shards into the right
+   DFS directories
+
+2) Data compression is not yet implemented, but there is an obvious
+   place to do so in the HStore.
+
+3) We need easy interfaces to MapReduce jobs, so they can scan tables
+
+4) The HMemcache lookup structure is relatively inefficient
+
+5) File compaction is relatively slow; we should have a more
+   conservative algorithm for deciding when to apply compaction.
+
+6) For the getFull() operation, use of Bloom filters would speed
+   things up
+
+7) We need stress-test and performance-number tools for the whole
+   system
+
+8) There's some HRegion-specific testing code that worked fine during
+   development, but it has to be rewritten so it works against an
+   HRegion while it's hosted by an HRegionServer, and connected to an
+   HBaseMaster. This code is at the bottom of the HRegion.java file.
+

Added: lucene/hadoop/trunk/src/contrib/hbase/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/build.xml?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/build.xml (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/build.xml Tue Apr  3 13:34:28 2007
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+
+<!-- 
+Before you can run these subtargets directly, you need 
+to call at top-level: ant deploy-contrib compile-core-test
+-->
+<project name="hbase" default="jar">
+
+  <import file="../build-contrib.xml"/>
+
+  <!-- Override jar target to specify main class -->
+  <target name="jar" depends="compile">
+    <jar
+      jarfile="${build.dir}/hadoop-${name}.jar"
+      basedir="${build.classes}"      
+    />
+  </target>
+  
+  <target name="test">
+    <antcall target="hadoopbuildcontrib.test"/>
+  </target>  
+
+</project>

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,248 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.TreeMap;
+import java.util.Vector;
+
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/*******************************************************************************
+ * Abstract base class that implements the HScannerInterface.
+ * Used by the concrete HMemcacheScanner and HStoreScanners
+ ******************************************************************************/
+public abstract class HAbstractScanner implements HScannerInterface {
+
+  // Pattern to determine if a column key is a regex
+
+  private static Pattern isRegexPattern = Pattern.compile("^.*[\\\\+|^&*$\\[\\]\\}{)(]+.*$");
+  
+  // The kind of match we are doing on a column:
+
+  private static enum MATCH_TYPE {
+    FAMILY_ONLY,                        // Just check the column family name
+    REGEX,                              // Column family + matches regex
+    SIMPLE                              // Literal matching
+  };
+
+  // This class provides column matching functions that are more sophisticated
+  // than a simple string compare. There are three types of matching:
+  // 1. Match on the column family name only
+  // 2. Match on the column family + column key regex
+  // 3. Simple match: compare column family + column key literally
+  
+  private class ColumnMatcher {
+    private MATCH_TYPE matchType;
+    private String family;
+    private Pattern columnMatcher;
+    private Text col;
+  
+    ColumnMatcher(Text col) throws IOException {
+      String column = col.toString();
+      try {
+        int colpos = column.indexOf(":") + 1;
+        if(colpos == 0) {
+          throw new IllegalArgumentException("Column name has no family indicator.");
+        }
+
+        String columnkey = column.substring(colpos);
+
+        if(columnkey == null || columnkey.length() == 0) {
+          this.matchType = MATCH_TYPE.FAMILY_ONLY;
+          this.family = column.substring(0, colpos);
+
+        } else if(isRegexPattern.matcher(columnkey).matches()) {
+          this.matchType = MATCH_TYPE.REGEX;
+          this.columnMatcher = Pattern.compile(column);
+
+        } else {
+          this.matchType = MATCH_TYPE.SIMPLE;
+          this.col = col;
+        }
+      } catch(Exception e) {
+        throw new IOException("Column: " + column + ": " + e.getMessage());
+      }
+    }
+    
+    // Matching method
+    
+    boolean matches(Text col) throws IOException {
+      if(this.matchType == MATCH_TYPE.SIMPLE) {
+        return col.equals(this.col);
+        
+      } else if(this.matchType == MATCH_TYPE.FAMILY_ONLY) {
+        return col.toString().startsWith(this.family);
+        
+      } else if(this.matchType == MATCH_TYPE.REGEX) {
+        return this.columnMatcher.matcher(col.toString()).matches();
+        
+      } else {
+        throw new IOException("Invalid match type: " + this.matchType);
+      }
+    }
+  }
+  
+  protected TreeMap<Text, Vector<ColumnMatcher>> okCols;        // Holds matchers for each column family 
+  
+  protected boolean scannerClosed = false;                      // True when scanning is done
+  
+  protected HStoreKey keys[];                                   // Keys retrieved from the sources
+  protected BytesWritable vals[];                               // Values that correspond to those keys
+  
+  protected long timestamp;                                     // The timestamp to match entries against
+  
+  protected DataOutputBuffer outbuf = new DataOutputBuffer();
+  protected DataInputBuffer inbuf = new DataInputBuffer();
+
+  /** Constructor for abstract base class */
+  HAbstractScanner(long timestamp, Text[] targetCols) throws IOException {
+    
+    this.timestamp = timestamp;
+    this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
+    for(int i = 0; i < targetCols.length; i++) {
+      Text family = HStoreKey.extractFamily(targetCols[i]);
+      Vector<ColumnMatcher> matchers = okCols.get(family);
+      if(matchers == null) {
+        matchers = new Vector<ColumnMatcher>();
+      }
+      matchers.add(new ColumnMatcher(targetCols[i]));
+      okCols.put(family, matchers);
+    }
+  }
+
+  /**
+   * For a particular column i, find all the matchers defined for the column.
+   * Compare the column family and column key using the matchers. The first one
+   * that matches returns true. If no matchers are successful, return false.
+   * 
+   * @param i index into the keys array
+   * @return true  - if any of the matchers for the column match the column family
+   *                 and the column key.
+   *                 
+   * @throws IOException
+   */
+  boolean columnMatch(int i) throws IOException {
+    Text column = keys[i].getColumn();
+    Text family = HStoreKey.extractFamily(column);
+    Vector<ColumnMatcher> matchers = okCols.get(family);
+    if(matchers == null) {
+      return false;
+    }
+    for(int m = 0; m < matchers.size(); m++) {
+      if(matchers.get(m).matches(column)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  /**
+   * If the user didn't want to start scanning at the first row, this method
+   * seeks to the requested row.
+   */
+  abstract boolean findFirstRow(int i, Text firstRow) throws IOException;
+  
+  /** The concrete implementations provide a mechanism to find the next set of values */
+  abstract boolean getNext(int i) throws IOException;
+  
+  /** Mechanism used by concrete implementation to shut down a particular scanner */
+  abstract void closeSubScanner(int i) throws IOException;
+  
+  /** Mechanism used to shut down the whole scan */
+  public abstract void close() throws IOException;
+
+  /**
+   * Get the next set of values for this scanner.
+   * 
+   * @param key - The key that matched
+   * @param results - all the results for that key.
+   * @return - true if a match was found
+   * 
+   * @see org.apache.hadoop.hbase.HScannerInterface#next(org.apache.hadoop.hbase.HStoreKey, java.util.TreeMap)
+   */
+  public boolean next(HStoreKey key, TreeMap<Text, byte[]> results)
+      throws IOException {
+ 
+    // Find the next row label (and timestamp)
+ 
+    Text chosenRow = null;
+    long chosenTimestamp = -1;
+    for(int i = 0; i < keys.length; i++) {
+      while((keys[i] != null)
+          && (columnMatch(i))
+          && (keys[i].getTimestamp() <= this.timestamp)
+          && ((chosenRow == null)
+              || (keys[i].getRow().compareTo(chosenRow) < 0)
+              || ((keys[i].getRow().compareTo(chosenRow) == 0)
+                  && (keys[i].getTimestamp() > chosenTimestamp)))) {
+
+        chosenRow = new Text(keys[i].getRow());
+        chosenTimestamp = keys[i].getTimestamp();
+      }
+    }
+
+    // Grab all the values that match this row/timestamp
+
+    boolean insertedItem = false;
+    if(chosenRow != null) {
+      key.setRow(chosenRow);
+      key.setVersion(chosenTimestamp);
+      key.setColumn(new Text(""));
+
+      for(int i = 0; i < keys.length; i++) {
+        // Fetch the data
+        
+        while((keys[i] != null)
+            && (keys[i].getRow().compareTo(chosenRow) == 0)
+            && (keys[i].getTimestamp() == chosenTimestamp)) {
+
+          if(columnMatch(i)) {
+            outbuf.reset();
+            vals[i].write(outbuf);
+            byte byteresults[] = outbuf.getData();
+            inbuf.reset(byteresults, outbuf.getLength());
+            BytesWritable tmpval = new BytesWritable();
+            tmpval.readFields(inbuf);
+            results.put(new Text(keys[i].getColumn()), tmpval.get());
+            insertedItem = true;
+          }
+
+          if (! getNext(i)) {
+            closeSubScanner(i);
+          }
+        }
+
+        // Advance the current scanner beyond the chosen row, to
+        // a valid timestamp, so we're ready next time.
+        
+        while((keys[i] != null)
+            && ((keys[i].getRow().compareTo(chosenRow) <= 0)
+                || (keys[i].getTimestamp() > this.timestamp)
+                || (! columnMatch(i)))) {
+
+          getNext(i);
+        }
+      }
+    }
+    return insertedItem;
+  }
+}

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,533 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.ipc.*;
+import org.apache.hadoop.conf.*;
+
+import java.io.*;
+import java.util.*;
+
+/*******************************************************************************
+ * HClient manages a connection to a single HRegionServer.
+ ******************************************************************************/
+public class HClient extends HGlobals implements HConstants {
+  private static final Text[] metaColumns = {
+    META_COLUMN_FAMILY
+  };
+  private static final Text startRow = new Text();
+  
+  private boolean closed;
+  private Configuration conf;
+  private HServerAddress masterLocation;
+  private long clientTimeout;
+  private int numTimeouts;
+  private int numRetries;
+  private HMasterInterface master;
+  
+  private class TableInfo {
+    public HRegionInfo regionInfo;
+    public HServerAddress serverAddress;
+
+    TableInfo(HRegionInfo regionInfo, HServerAddress serverAddress) {
+      this.regionInfo = regionInfo;
+      this.serverAddress = serverAddress;
+    }
+  }
+  
+  // Map tableName -> (Map startRow -> (HRegionInfo, HServerAddress)
+  
+  private TreeMap<Text, TreeMap<Text, TableInfo>> tablesToServers;
+  
+  // For the "current" table: Map startRow -> (HRegionInfo, HServerAddress)
+  
+  private TreeMap<Text, TableInfo> tableServers;
+  
+  // Known region HServerAddress.toString() -> HRegionInterface
+  
+  private TreeMap<String, HRegionInterface> servers;
+  
+  // For row mutation operations
+
+  private Text currentRegion;
+  private HRegionInterface currentServer;
+  private Random rand;
+  private long clientid;
+
+  /** Creates a new HClient */
+  public HClient(Configuration conf) {
+    this.closed = false;
+    this.conf = conf;
+    
+    // Load config settings
+    
+    this.masterLocation = new HServerAddress(this.conf.get(MASTER_DEFAULT_NAME));
+    this.clientTimeout = this.conf.getLong("hbase.client.timeout.length", 10 * 1000);
+    this.numTimeouts = this.conf.getInt("hbase.client.timeout.number", 5);
+    this.numRetries = this.conf.getInt("hbase.client.retries.number", 2);
+    
+    // Finish initialization
+
+    this.master = null;
+    this.tablesToServers = new TreeMap<Text, TreeMap<Text, TableInfo>>();
+    this.tableServers = null;
+    this.servers = new TreeMap<String, HRegionInterface>();
+    
+    // For row mutation operations
+    
+    this.currentRegion = null;
+    this.currentServer = null;
+    this.rand = new Random();
+  }
+
+  public synchronized void openTable(Text tableName) throws IOException {
+    if(closed) {
+      throw new IllegalStateException("client is not open");
+    }
+
+    tableServers = tablesToServers.get(tableName);
+    if(tableServers == null ) {                 // We don't know where the table is
+      findTableInMeta(tableName);               // Load the information from meta
+    }
+  }
+
+  private void findTableInMeta(Text tableName) throws IOException {
+    TreeMap<Text, TableInfo> metaServers = tablesToServers.get(META_TABLE_NAME);
+    
+    if(metaServers == null) {                   // Don't know where the meta is
+      loadMetaFromRoot(tableName);
+      if(tableName.equals(META_TABLE_NAME) || tableName.equals(ROOT_TABLE_NAME)) {
+        // All we really wanted was the meta or root table
+        return;
+      }
+      metaServers = tablesToServers.get(META_TABLE_NAME);
+    }
+
+    tableServers = new TreeMap<Text, TableInfo>();
+    for(Iterator<TableInfo> i = metaServers.tailMap(tableName).values().iterator();
+        i.hasNext(); ) {
+      
+      TableInfo t = i.next();
+      
+      scanOneMetaRegion(t, tableName);
+    }
+    tablesToServers.put(tableName, tableServers);
+  }
+
+  /*
+   * Load the meta table from the root table.
+   */
+  private void loadMetaFromRoot(Text tableName) throws IOException {
+    locateRootRegion();
+    if(tableName.equals(ROOT_TABLE_NAME)) {   // All we really wanted was the root
+      return;
+    }
+    scanRoot();
+  }
+  
+  /*
+   * Repeatedly try to find the root region by asking the HMaster for where it
+   * could be.
+   */
+  private void locateRootRegion() throws IOException {
+    if(master == null) {
+      master = (HMasterInterface)RPC.getProxy(HMasterInterface.class, 
+                   HMasterInterface.versionID,
+                   masterLocation.getInetSocketAddress(), conf);
+    }
+    
+    int tries = 0;
+    HServerAddress rootRegionLocation = null;
+    do {
+      int localTimeouts = 0;
+      while(rootRegionLocation == null && localTimeouts < numTimeouts) {
+        rootRegionLocation = master.findRootRegion();
+
+        if(rootRegionLocation == null) {
+          try {
+            Thread.sleep(clientTimeout);
+
+          } catch(InterruptedException iex) {
+          }
+          localTimeouts++;
+        }
+      }
+      if(rootRegionLocation == null) {
+        throw new IOException("Timed out trying to locate root region");
+      }
+      
+      // Verify that this server still serves the root region
+      
+      HRegionInterface rootRegion = getHRegionConnection(rootRegionLocation);
+
+      if(rootRegion.getRegionInfo(rootRegionInfo.regionName) != null) {
+        tableServers = new TreeMap<Text, TableInfo>();
+        tableServers.put(startRow, new TableInfo(rootRegionInfo, rootRegionLocation));
+        tablesToServers.put(ROOT_TABLE_NAME, tableServers);
+        break;
+      }
+      rootRegionLocation = null;
+      
+    } while(rootRegionLocation == null && tries++ < numRetries);
+    
+    if(rootRegionLocation == null) {
+      closed = true;
+      throw new IOException("unable to locate root region server");
+    }
+  }
+
+  /*
+   * Scans the root region to find all the meta regions
+   */
+  private void scanRoot() throws IOException {
+    tableServers = new TreeMap<Text, TableInfo>();
+    TableInfo t = tablesToServers.get(ROOT_TABLE_NAME).get(startRow);
+    scanOneMetaRegion(t, META_TABLE_NAME);
+    tablesToServers.put(META_TABLE_NAME, tableServers);
+  }
+
+  /*
+   * Scans a single meta region
+   * @param t           - the table we're going to scan
+   * @param tableName   - the name of the table we're looking for
+   */
+  private void scanOneMetaRegion(TableInfo t, Text tableName) throws IOException {
+    HRegionInterface server = getHRegionConnection(t.serverAddress);
+    HScannerInterface scanner = null;
+    try {
+      scanner = server.openScanner(t.regionInfo.regionName, metaColumns, tableName);
+      HStoreKey key = new HStoreKey();
+      TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+      DataInputBuffer inbuf = new DataInputBuffer();
+
+      while(scanner.next(key, results)) {
+        byte hRegionInfoBytes[] = results.get(META_COL_REGIONINFO);
+        inbuf.reset(hRegionInfoBytes, hRegionInfoBytes.length);
+        HRegionInfo regionInfo = new HRegionInfo();
+        regionInfo.readFields(inbuf);
+        
+        if(! regionInfo.tableDesc.getName().equals(tableName)) {
+          // We're done
+          break;
+        }
+                    
+        byte serverBytes[] = results.get(META_COL_SERVER);
+        String serverName = new String(serverBytes, UTF8_ENCODING);
+          
+        tableServers.put(regionInfo.startKey, 
+            new TableInfo(regionInfo, new HServerAddress(serverName)));
+
+        results.clear();
+      }
+    } finally {
+      scanner.close();
+    }
+  }
+
+  public synchronized HRegionInterface getHRegionConnection(HServerAddress regionServer)
+      throws IOException {
+
+      // See if we already have a connection
+
+    HRegionInterface server = servers.get(regionServer.toString());
+    
+    if(server == null) {                                // Get a connection
+      
+      server = (HRegionInterface)RPC.waitForProxy(HRegionInterface.class, 
+          HRegionInterface.versionID, regionServer.getInetSocketAddress(), conf);
+      
+      servers.put(regionServer.toString(), server);
+    }
+    return server;
+  }
+
+  /** Close the connection to the HRegionServer */
+  public synchronized void close() throws IOException {
+    if(! closed) {
+      RPC.stopClient();
+      closed = true;
+    }
+  }
+
+  /**
+   * List all the userspace tables.  In other words, scan the META table.
+   *
+   * If we wanted this to be really fast, we could implement a special
+   * catalog table that just contains table names and their descriptors.
+   * Right now, it only exists as part of the META table's region info.
+   */
+  public HTableDescriptor[] listTables() throws IOException {
+    TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
+    
+    TreeMap<Text, TableInfo> metaTables = tablesToServers.get(META_TABLE_NAME);
+    if(metaTables == null) {
+      // Meta is not loaded yet so go do that
+      loadMetaFromRoot(META_TABLE_NAME);
+      metaTables = tablesToServers.get(META_TABLE_NAME);
+    }
+
+    for(Iterator<TableInfo>i = metaTables.values().iterator(); i.hasNext(); ) {
+      TableInfo t = i.next();
+      HRegionInterface server = getHRegionConnection(t.serverAddress);
+      HScannerInterface scanner = null;
+      try {
+        scanner = server.openScanner(t.regionInfo.regionName, metaColumns, startRow);
+        HStoreKey key = new HStoreKey();
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        DataInputBuffer inbuf = new DataInputBuffer();
+        while(scanner.next(key, results)) {
+          byte infoBytes[] = (byte[]) results.get(ROOT_COL_REGIONINFO);
+          inbuf.reset(infoBytes, infoBytes.length);
+          HRegionInfo info = new HRegionInfo();
+          info.readFields(inbuf);
+
+          // Only examine the rows where the startKey is zero length
+          
+          if(info.startKey.getLength() == 0) {
+            uniqueTables.add(info.tableDesc);
+          }
+          results.clear();
+        }
+        
+      } finally {
+        scanner.close();
+      }
+    }
+    return (HTableDescriptor[]) uniqueTables.toArray(new HTableDescriptor[uniqueTables.size()]);
+  }
+
+  private TableInfo getTableInfo(Text row) {
+    if(tableServers == null) {
+      throw new IllegalStateException("Must open table first");
+    }
+    
+    // Only one server will have the row we are looking for
+    
+    Text serverKey = tableServers.tailMap(row).firstKey();
+    return tableServers.get(serverKey);
+  }
+  
+  /** Get a single value for the specified row and column */
+  public byte[] get(Text row, Text column) throws IOException {
+    TableInfo info = getTableInfo(row);
+    return getHRegionConnection(info.serverAddress).get(
+        info.regionInfo.regionName, row, column).get();
+  }
+ 
+  /** Get the specified number of versions of the specified row and column */
+  public byte[][] get(Text row, Text column, int numVersions) throws IOException {
+    TableInfo info = getTableInfo(row);
+    BytesWritable[] values = getHRegionConnection(info.serverAddress).get(
+        info.regionInfo.regionName, row, column, numVersions);
+    
+    ArrayList<byte[]> bytes = new ArrayList<byte[]>();
+    for(int i = 0 ; i < values.length; i++) {
+      bytes.add(values[i].get());
+    }
+    return bytes.toArray(new byte[values.length][]);
+  }
+  
+  /** 
+   * Get the specified number of versions of the specified row and column with
+   * the specified timestamp.
+   */
+  public byte[][] get(Text row, Text column, long timestamp, int numVersions) throws IOException {
+    TableInfo info = getTableInfo(row);
+    BytesWritable[] values = getHRegionConnection(info.serverAddress).get(
+        info.regionInfo.regionName, row, column, timestamp, numVersions);
+    
+    ArrayList<byte[]> bytes = new ArrayList<byte[]>();
+    for(int i = 0 ; i < values.length; i++) {
+      bytes.add(values[i].get());
+    }
+    return bytes.toArray(new byte[values.length][]);
+  }
+
+  /** Get all the data for the specified row */
+  public LabelledData[] getRow(Text row) throws IOException {
+    TableInfo info = getTableInfo(row);
+    return getHRegionConnection(info.serverAddress).getRow(
+        info.regionInfo.regionName, row);
+  }
+
+  /** 
+   * Get a scanner on the current table starting at the specified row.
+   * Return the specified columns.
+   */
+  public HScannerInterface obtainScanner(Text[] columns, Text startRow) throws IOException {
+    if(tableServers == null) {
+      throw new IllegalStateException("Must open table first");
+    }
+    return new ClientScanner(columns, startRow);
+  }
+
+  /** Start an atomic row insertion or update */
+  public long startUpdate(Text row) throws IOException {
+    TableInfo info = getTableInfo(row);
+    long lockid;
+    try {
+      currentServer = getHRegionConnection(info.serverAddress);
+      currentRegion = info.regionInfo.regionName;
+      clientid = rand.nextLong();
+      lockid = currentServer.startUpdate(currentRegion, clientid, row);
+      
+    } catch(IOException e) {
+      currentServer = null;
+      currentRegion = null;
+      throw e;
+    }
+    return lockid;
+  }
+  
+  /** Change a value for the specified column */
+  public void put(long lockid, Text column, byte val[]) throws IOException {
+    try {
+      currentServer.put(currentRegion, clientid, lockid, column, new BytesWritable(val));
+      
+    } catch(IOException e) {
+      try {
+        currentServer.abort(currentRegion, clientid, lockid);
+        
+      } catch(IOException e2) {
+      }
+      currentServer = null;
+      currentRegion = null;
+      throw e;
+    }
+  }
+  
+  /** Delete the value for a column */
+  public void delete(long lockid, Text column) throws IOException {
+    try {
+      currentServer.delete(currentRegion, clientid, lockid, column);
+      
+    } catch(IOException e) {
+      try {
+        currentServer.abort(currentRegion, clientid, lockid);
+        
+      } catch(IOException e2) {
+      }
+      currentServer = null;
+      currentRegion = null;
+      throw e;
+    }
+  }
+  
+  /** Abort a row mutation */
+  public void abort(long lockid) throws IOException {
+    try {
+      currentServer.abort(currentRegion, clientid, lockid);
+      
+    } catch(IOException e) {
+      currentServer = null;
+      currentRegion = null;
+      throw e;
+    }
+  }
+  
+  /** Finalize a row mutation */
+  public void commit(long lockid) throws IOException {
+    try {
+      currentServer.commit(currentRegion, clientid, lockid);
+      
+    } finally {
+      currentServer = null;
+      currentRegion = null;
+    }
+  }
+  
+  /**
+   * Implements the scanner interface for the HBase client.
+   * If there are multiple regions in a table, this scanner will iterate
+   * through them all.
+   */
+  private class ClientScanner implements HScannerInterface {
+    private Text[] columns;
+    private Text startRow;
+    private boolean closed;
+    private TableInfo[] regions;
+    private int currentRegion;
+    private HRegionInterface server;
+    private HScannerInterface scanner;
+    
+    public ClientScanner(Text[] columns, Text startRow) throws IOException {
+      this.columns = columns;
+      this.startRow = startRow;
+      this.closed = false;
+      Collection<TableInfo> info = tableServers.tailMap(startRow).values();
+      this.regions = info.toArray(new TableInfo[info.size()]);
+      this.currentRegion = -1;
+      this.server = null;
+      this.scanner = null;
+      nextScanner();
+    }
+    
+    /*
+     * Gets a scanner for the next region.
+     * Returns false if there are no more scanners.
+     */
+    private boolean nextScanner() throws IOException {
+      if(scanner != null) {
+        scanner.close();
+      }
+      currentRegion += 1;
+      if(currentRegion == regions.length) {
+        close();
+        return false;
+      }
+      try {
+        server = getHRegionConnection(regions[currentRegion].serverAddress);
+        scanner = server.openScanner(regions[currentRegion].regionInfo.regionName,
+            columns, startRow);
+        
+      } catch(IOException e) {
+        close();
+        throw e;
+      }
+      return true;
+    }
+    
+    /* (non-Javadoc)
+     * @see org.apache.hadoop.hbase.HScannerInterface#next(org.apache.hadoop.hbase.HStoreKey, java.util.TreeMap)
+     */
+    public boolean next(HStoreKey key, TreeMap<Text, byte[]> results) throws IOException {
+      if(closed) {
+        return false;
+      }
+      boolean status = scanner.next(key, results);
+      if(! status) {
+        status = nextScanner();
+        if(status) {
+          status = scanner.next(key, results);
+        }
+      }
+      return status;
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.hadoop.hbase.HScannerInterface#close()
+     */
+    public void close() throws IOException {
+      if(scanner != null) {
+        scanner.close();
+      }
+      server = null;
+      closed = true;
+    }
+  }
+
+}

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.io.Text;
+
+/*******************************************************************************
+ * HConstants holds a bunch of HBase-related constants
+ ******************************************************************************/
+public interface HConstants {
+  
+  // Configuration parameters
+  
+  static final String MASTER_DEFAULT_NAME = "hbase.master.default.name";
+  static final String HREGION_DIR = "hbase.regiondir";
+  static final String DEFAULT_HREGION_DIR = "/hbase";
+  static final String HREGIONDIR_PREFIX = "hregion_";
+
+  // Always store the location of the root table's HRegion.
+  // This HRegion is never split.
+
+  // region name = table + startkey + regionid. This is the row key.
+  // each row in the root and meta tables describes exactly 1 region
+  // Do we ever need to know all the information that we are storing?
+  
+  static final Text ROOT_TABLE_NAME = new Text("--ROOT--");
+  static final Text ROOT_COLUMN_FAMILY = new Text("info");
+  static final Text ROOT_COL_REGIONINFO = new Text(ROOT_COLUMN_FAMILY + ":" + "regioninfo");
+  static final Text ROOT_COL_SERVER = new Text(ROOT_COLUMN_FAMILY + ":" + "server");
+  static final Text ROOT_COL_STARTCODE = new Text(ROOT_COLUMN_FAMILY + ":" + "serverstartcode");
+
+  static final Text META_TABLE_NAME = new Text("--META--");
+  static final Text META_COLUMN_FAMILY = new Text(ROOT_COLUMN_FAMILY);
+  static final Text META_COL_REGIONINFO = new Text(ROOT_COL_REGIONINFO);
+  static final Text META_COL_SERVER = new Text(ROOT_COL_SERVER);
+  static final Text META_COL_STARTCODE = new Text(ROOT_COL_STARTCODE);
+
+  // Other constants
+  
+  static final long DESIRED_MAX_FILE_SIZE = 128 * 1024 * 1024;        // 128MB
+  static final String UTF8_ENCODING = "UTF-8";
+  
+}

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+/*******************************************************************************
+ * Global values used for finding and scanning the root and meta tables.
+ ******************************************************************************/
+public class HGlobals implements HConstants {
+  
+  static HTableDescriptor rootTableDesc = null;
+  static HRegionInfo rootRegionInfo = null;
+  static HTableDescriptor metaTableDesc = null;
+
+  static {
+    rootTableDesc = new HTableDescriptor(ROOT_TABLE_NAME.toString(), 1);
+    rootTableDesc.addFamily(ROOT_COLUMN_FAMILY);
+    
+    rootRegionInfo = new HRegionInfo(0L, rootTableDesc, null, null);
+    
+    metaTableDesc = new HTableDescriptor(META_TABLE_NAME.toString(), 1);
+    metaTableDesc.addFamily(META_COLUMN_FAMILY);
+  }
+  
+
+}

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLocking.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLocking.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLocking.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLocking.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+/*******************************************************************************
+ * HLocking is a set of lock primitives that are pretty helpful in a few places
+ * around the HBase code.  For each independent entity that needs locking, create
+ * a new HLocking instance.
+ ******************************************************************************/
+public class HLocking {
+  Integer readerLock = new Integer(0);
+  Integer writerLock = new Integer(0);
+  int numReaders = 0;
+  int numWriters = 0;
+
+  public HLocking() {
+  }
+
+  /** Caller needs the nonexclusive read-lock */
+  public void obtainReadLock() {
+    synchronized(readerLock) {
+      synchronized(writerLock) {
+        while(numWriters > 0) {
+          try {
+            writerLock.wait();
+          } catch (InterruptedException ie) {
+          }
+        }
+        numReaders++;
+        readerLock.notifyAll();
+      }
+    }
+  }
+
+  /** Caller is finished with the nonexclusive read-lock */
+  public void releaseReadLock() {
+    synchronized(readerLock) {
+      synchronized(writerLock) {
+        numReaders--;
+        readerLock.notifyAll();
+      }
+    }
+  }
+
+  /** Caller needs the exclusive write-lock */
+  public void obtainWriteLock() {
+    synchronized(readerLock) {
+      synchronized(writerLock) {
+        while(numReaders > 0) {
+          try {
+            readerLock.wait();
+          } catch (InterruptedException ie) {
+          }
+        }
+        while(numWriters > 0) {
+          try {
+            writerLock.wait();
+          } catch (InterruptedException ie) {
+          }
+        }
+        numWriters++;
+        writerLock.notifyAll();
+      }
+    }
+  }
+
+  /** Caller is finished with the write lock */
+  public void releaseWriteLock() {
+    synchronized(readerLock) {
+      synchronized(writerLock) {
+        numWriters--;
+        writerLock.notifyAll();
+      }
+    }
+  }
+}
+

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,356 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+
+import java.io.*;
+import java.util.*;
+
+/*******************************************************************************
+ * HLog stores all the edits to the HStore.
+ * 
+ * It performs logfile-rolling, so external callers are not aware that the 
+ * underlying file is being rolled.
+ *
+ * A single HLog is used by several HRegions simultaneously.
+ * 
+ * Each one is identified by a unique long int.  HRegions do not need to declare
+ * themselves before using the HLog; they simply include their HRegion-id in the
+ * append() or completeCacheFlush() calls.
+ *
+ * An HLog consists of multiple on-disk files, which have a chronological order.
+ *
+ * As data is flushed to other (better) on-disk structures, the log becomes 
+ * obsolete.  We can destroy all the log messages for a given HRegion-id up to 
+ * the most-recent CACHEFLUSH message from that HRegion.
+ *
+ * It's only practical to delete entire files.  Thus, we delete an entire 
+ * on-disk file F when all of the messages in F have a log-sequence-id that's 
+ * older (smaller) than the most-recent CACHEFLUSH message for every HRegion 
+ * that has a message in F.
+ ******************************************************************************/
+public class HLog {
+  private static final Log LOG = LogFactory.getLog(HLog.class);
+  
+  static final String HLOG_DATFILE = "hlog.dat.";
+  static final Text METACOLUMN = new Text("METACOLUMN");
+  static final Text METAROW = new Text("METAROW");
+
+  FileSystem fs;
+  Path dir;
+  Configuration conf;
+
+  SequenceFile.Writer writer;
+  TreeMap<Long, Path> outputfiles = new TreeMap<Long, Path>();
+  boolean insideCacheFlush = false;
+
+  TreeMap<Text, Long> regionToLastFlush = new TreeMap<Text, Long>();
+  long oldestOutstandingSeqNum = -1;
+
+  boolean closed = false;
+  long logSeqNum = 0;
+  long filenum = 0;
+  int numEntries = 0;
+
+  Integer rollLock = new Integer(0);
+
+  /**
+   * Bundle up a bunch of log files (which are no longer being written to),
+   * into a new file.  Delete the old log files when ready.
+   */
+  public static void consolidateOldLog(Path srcDir, Path dstFile, FileSystem fs, Configuration conf) throws IOException {
+    LOG.debug("consolidating log files");
+    
+    Path logfiles[] = fs.listPaths(srcDir);
+    SequenceFile.Writer newlog = SequenceFile.createWriter(fs, conf, dstFile, HLogKey.class, HLogEdit.class);
+    try {
+      for(int i = 0; i < logfiles.length; i++) {
+        SequenceFile.Reader in = new SequenceFile.Reader(fs, logfiles[i], conf);
+        try {
+          HLogKey key = new HLogKey();
+          HLogEdit val = new HLogEdit();
+          
+          while(in.next(key, val)) {
+            newlog.append(key, val);
+          }
+          
+        } finally {
+          in.close();
+        }
+      }
+      
+    } finally {
+      newlog.close();
+    }
+    
+    if(fs.exists(srcDir)) {
+      
+      if(! fs.delete(srcDir)) {
+        LOG.error("Cannot delete: " + srcDir);
+        
+        if(! FileUtil.fullyDelete(new File(srcDir.toString()))) {
+          throw new IOException("Cannot delete: " + srcDir);
+        }
+      }
+    }
+    LOG.debug("log file consolidation completed");
+  }
+
+  /**
+   * Create an edit log at the given location.
+   *
+   * You should never have to load an existing log.  If there is a log
+   * at startup, it should have already been processed and deleted by 
+   * the time the HLog object is started up.
+   */
+  public HLog(FileSystem fs, Path dir, Configuration conf) throws IOException {
+    this.fs = fs;
+    this.dir = dir;
+    this.conf = conf;
+    this.logSeqNum = 0;
+
+    if(fs.exists(dir)) {
+      throw new IOException("Target HLog directory already exists: " + dir);
+    }
+    fs.mkdirs(dir);
+
+    rollWriter();
+  }
+
+  /**
+   * Roll the log writer.  That is, start writing log messages to
+   * a new file.
+   *
+   * The 'rollLock' prevents us from entering rollWriter() more than
+   * once at a time.
+   *
+   * The 'this' lock limits access to the current writer so
+   * we don't append multiple items simultaneously.
+   */
+  public void rollWriter() throws IOException {
+    synchronized(rollLock) {
+
+      // Try to roll the writer to a new file.  We may have to
+      // wait for a cache-flush to complete.  In the process,
+      // compute a list of old log files that can be deleted.
+
+      Vector<Path> toDeleteList = new Vector<Path>();
+      synchronized(this) {
+        if(closed) {
+          throw new IOException("Cannot roll log; log is closed");
+        }
+
+        // Make sure we do not roll the log while inside a
+        // cache-flush.  Otherwise, the log sequence number for
+        // the CACHEFLUSH operation will appear in a "newer" log file
+        // than it should.
+        
+        while(insideCacheFlush) {
+          try {
+            wait();
+          } catch (InterruptedException ie) {
+          }
+        }
+        
+        LOG.debug("closing current log writer and getting a new one");
+
+        // Close the current writer (if any), and grab a new one.
+        
+        if(writer != null) {
+          writer.close();
+          
+          if(filenum > 0) {
+            outputfiles.put(logSeqNum-1, computeFilename(filenum-1));
+          }
+        }
+        
+        Path newPath = computeFilename(filenum++);
+        this.writer = SequenceFile.createWriter(fs, conf, newPath, HLogKey.class, HLogEdit.class);
+
+        LOG.debug("new log writer created");
+        
+        // Can we delete any of the old log files?
+        // First, compute the oldest relevant log operation 
+        // over all the regions.
+
+        long oldestOutstandingSeqNum = Long.MAX_VALUE;
+        for(Iterator<Long> it = regionToLastFlush.values().iterator(); it.hasNext(); ) {
+          long curSeqNum = it.next().longValue();
+          
+          if(curSeqNum < oldestOutstandingSeqNum) {
+            oldestOutstandingSeqNum = curSeqNum;
+          }
+        }
+
+        // Next, remove all files with a final ID that's older
+        // than the oldest pending region-operation.
+
+        LOG.debug("removing old log files");
+        
+        for(Iterator<Long> it = outputfiles.keySet().iterator(); it.hasNext(); ) {
+          long maxSeqNum = it.next().longValue();
+          
+          if(maxSeqNum < oldestOutstandingSeqNum) {
+            Path p = outputfiles.get(maxSeqNum);
+            it.remove();
+            toDeleteList.add(p);
+            
+          } else {
+            break;
+          }
+        }
+      }
+
+      // Actually delete them, if any!
+
+      for(Iterator<Path> it = toDeleteList.iterator(); it.hasNext(); ) {
+        Path p = it.next();
+        fs.delete(p);
+      }
+
+      LOG.debug("old log files deleted");
+      
+      this.numEntries = 0;
+    }
+  }
+
+  /**
+   * This is a convenience method that computes a new filename with
+   * a given file-number.
+   */
+  Path computeFilename(long filenum) {
+    return new Path(dir, HLOG_DATFILE + String.format("%1$03d", filenum));
+  }
+
+  /** Shut down the log. */
+  public synchronized void close() throws IOException {
+    this.writer.close();
+    this.closed = true;
+  }
+
+  /**
+   * Append a set of edits to the log.
+   * Log edits are keyed by regionName, rowname, and log-sequence-id.
+   *
+   * Later, if we sort by these keys, we obtain all the relevant edits for
+   * a given key-range of the HRegion.  Any edits that do not have a matching
+   * COMPLETE_CACHEFLUSH message can be discarded.
+   *
+   * Logs cannot be restarted once closed, or once the HLog process dies.
+   * Each time the HLog starts, it must create a new log.  This means that
+   * other systems should process the log appropriately upon each startup
+   * (and prior to initializing HLog).
+   *
+   * We need to seize a lock on the writer so that writes are atomic.
+   */
+  public synchronized void append(Text regionName, Text tableName, Text row, TreeMap<Text, byte[]> columns, long timestamp) throws IOException {
+    if(closed) {
+      throw new IOException("Cannot append; log is closed");
+    }
+    
+    long seqNum[] = obtainSeqNum(columns.size());
+
+    // The 'regionToLastFlush' map holds the sequence id of the
+    // most recent flush for every regionName.  However, for regions
+    // that don't have any flush yet, the relevant operation is the
+    // first one that's been added.
+    
+    if(regionToLastFlush.get(regionName) == null) {
+      regionToLastFlush.put(regionName, seqNum[0]);
+    }
+
+    int counter = 0;
+    for(Iterator<Text> it = columns.keySet().iterator(); it.hasNext(); ) {
+      Text column = it.next();
+      byte[] val = columns.get(column);
+      HLogKey logKey = new HLogKey(regionName, tableName, row, seqNum[counter++]);
+      HLogEdit logEdit = new HLogEdit(column, val, timestamp);
+      writer.append(logKey, logEdit);
+
+      numEntries++;
+    }
+  }
+
+  /** How many items have been added to the log? */
+  public int getNumEntries() {
+    return numEntries;
+  }
+
+  /**
+   * Obtain a log sequence number.  This seizes the whole HLog
+   * lock, but it shouldn't last too long.
+   */
+  synchronized long obtainSeqNum() {
+    return logSeqNum++;
+  }
+  
+  synchronized long[] obtainSeqNum(int num) {
+    long[] results = new long[num];
+    for (int i = 0; i < num; i++) {
+      results[i] = logSeqNum++;
+    }
+    return results;
+  }
+
+  /**
+   * By acquiring a log sequence ID, we can allow log messages
+   * to continue while we flush the cache.
+   *
+   * Set a flag so that we do not roll the log between the start
+   * and complete of a cache-flush.  Otherwise the log-seq-id for
+   * the flush will not appear in the correct logfile.
+   */
+  public synchronized long startCacheFlush() {
+    while(insideCacheFlush) {
+      try {
+        wait();
+      } catch (InterruptedException ie) {
+      }
+    }
+    
+    insideCacheFlush = true;
+    notifyAll();
+    return obtainSeqNum();
+  }
+
+  /** Complete the cache flush */
+  public synchronized void completeCacheFlush(Text regionName, Text tableName, long logSeqId) throws IOException {
+    if(closed) {
+      return;
+    }
+    
+    if(! insideCacheFlush) {
+      throw new IOException("Impossible situation: inside completeCacheFlush(), but 'insideCacheFlush' flag is false");
+    }
+    
+    writer.append(new HLogKey(regionName, tableName, HLog.METAROW, logSeqId),
+        new HLogEdit(HLog.METACOLUMN, HStoreKey.COMPLETE_CACHEFLUSH, System.currentTimeMillis()));
+    numEntries++;
+
+    // Remember the most-recent flush for each region.
+    // This is used to delete obsolete log files.
+    
+    regionToLastFlush.put(regionName, logSeqId);
+
+    insideCacheFlush = false;
+    notifyAll();
+  }
+}

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+/*******************************************************************************
+ * A log value.
+ *
+ * These aren't sortable; you need to sort by the matching HLogKey.
+ * The table and row are already identified in HLogKey.
+ * This just indicates the column and value.
+ ******************************************************************************/
+public class HLogEdit implements Writable {
+  Text column = new Text();
+  BytesWritable val = new BytesWritable();
+  long timestamp;
+
+  public HLogEdit() {
+  }
+
+  public HLogEdit(Text column, byte[] bval, long timestamp) {
+    this.column.set(column);
+    this.val = new BytesWritable(bval);
+    this.timestamp = timestamp;
+  }
+
+  public Text getColumn() {
+    return this.column;
+  }
+
+  public BytesWritable getVal() {
+    return this.val;
+  }
+
+  public long getTimestamp() {
+    return this.timestamp;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Writable
+  //////////////////////////////////////////////////////////////////////////////
+
+  public void write(DataOutput out) throws IOException {
+    this.column.write(out);
+    this.val.write(out);
+    out.writeLong(timestamp);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    this.column.readFields(in);
+    this.val.readFields(in);
+    this.timestamp = in.readLong();
+  }
+}
+

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java?view=auto&rev=525267
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java Tue Apr  3 13:34:28 2007
@@ -0,0 +1,117 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+/*******************************************************************************
+ * A Key for an entry in the change log.
+ * 
+ * The log intermingles edits to many tables and rows, so each log entry 
+ * identifies the appropriate table and row.  Within a table and row, they're 
+ * also sorted.
+ ******************************************************************************/
+public class HLogKey implements WritableComparable {
+  Text regionName = new Text();
+  Text tablename = new Text();
+  Text row = new Text();
+  long logSeqNum = 0L;
+
+  /**
+   * Create the log key!
+   * We maintain the tablename mainly for debugging purposes.
+   * A regionName is always a sub-table object.
+   */
+  public HLogKey() {
+  }
+  
+  public HLogKey(Text regionName, Text tablename, Text row, long logSeqNum) {
+    this.regionName.set(regionName);
+    this.tablename.set(tablename);
+    this.row.set(row);
+    this.logSeqNum = logSeqNum;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // A bunch of accessors
+  //////////////////////////////////////////////////////////////////////////////
+
+  public Text getRegionName() {
+    return regionName;
+  }
+  
+  public Text getTablename() {
+    return tablename;
+  }
+  
+  public Text getRow() {
+    return row;
+  }
+  
+  public long getLogSeqNum() {
+    return logSeqNum;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Comparable
+  //////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * When sorting through log entries, we want to group items
+   * first in the same table, then to the same row, then finally
+   * ordered by write-order.
+   */
+  public int compareTo(Object o) {
+    HLogKey other = (HLogKey) o;
+    int result = this.regionName.compareTo(other.regionName);
+    
+    if(result == 0) {
+      result = this.row.compareTo(other.row);
+      
+      if(result == 0) {
+        
+        if (this.logSeqNum < other.logSeqNum) {
+          result = -1;
+          
+        } else if (this.logSeqNum > other.logSeqNum) {
+          result = 1;
+        }
+      }
+    }
+    return result;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Writable
+  //////////////////////////////////////////////////////////////////////////////
+
+  public void write(DataOutput out) throws IOException {
+    this.regionName.write(out);
+    this.tablename.write(out);
+    this.row.write(out);
+    out.writeLong(logSeqNum);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    this.regionName.readFields(in);
+    this.tablename.readFields(in);
+    this.row.readFields(in);
+    this.logSeqNum = in.readLong();
+  }
+}
+



Mime
View raw message