hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmhs...@apache.org
Subject svn commit: r1445918 [28/29] - in /hbase/branches/hbase-7290: ./ bin/ conf/ dev-support/ hbase-client/ hbase-common/ hbase-common/src/main/java/org/apache/hadoop/hbase/ hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/ hbase-common/src/ma...
Date Wed, 13 Feb 2013 20:58:32 GMT
Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java Wed Feb 13 20:58:23 2013
@@ -18,12 +18,19 @@ package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Random;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
+import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -34,7 +41,6 @@ public abstract class MultiThreadedActio
   private static final Log LOG = LogFactory.getLog(MultiThreadedAction.class);
 
   protected final byte[] tableName;
-  protected final byte[] columnFamily;
   protected final Configuration conf;
 
   protected int numThreads = 1;
@@ -51,8 +57,69 @@ public abstract class MultiThreadedActio
   protected AtomicLong totalOpTimeMs = new AtomicLong();
   protected boolean verbose = false;
 
-  protected int minDataSize = 256;
-  protected int maxDataSize = 1024;
+  protected LoadTestDataGenerator dataGenerator = null;
+
+  /**
+   * Default implementation of LoadTestDataGenerator that uses LoadTestKVGenerator, fixed
+   * set of column families, and random number of columns in range. The table for it can
+   * be created manually or, for example, via
+   * {@link HBaseTestingUtility#createPreSplitLoadTestTable(
+   * org.apache.hadoop.hbase.Configuration, byte[], byte[], Algorithm, DataBlockEncoding)}
+   */
+  public static class DefaultDataGenerator extends LoadTestDataGenerator {
+    private byte[][] columnFamilies = null;
+    private int minColumnsPerKey;
+    private int maxColumnsPerKey;
+    private final Random random = new Random();
+
+    public DefaultDataGenerator(int minValueSize, int maxValueSize,
+        int minColumnsPerKey, int maxColumnsPerKey, byte[]... columnFamilies) {
+      super(minValueSize, maxValueSize);
+      this.columnFamilies = columnFamilies;
+      this.minColumnsPerKey = minColumnsPerKey;
+      this.maxColumnsPerKey = maxColumnsPerKey;
+    }
+
+    public DefaultDataGenerator(byte[]... columnFamilies) {
+      // Default values for tests that didn't care to provide theirs.
+      this(256, 1024, 1, 10, columnFamilies);
+    }
+
+    @Override
+    public byte[] getDeterministicUniqueKey(long keyBase) {
+      return LoadTestKVGenerator.md5PrefixedKey(keyBase).getBytes();
+    }
+
+    @Override
+    public byte[][] getColumnFamilies() {
+      return columnFamilies;
+    }
+
+    @Override
+    public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
+      int numColumns = minColumnsPerKey + random.nextInt(maxColumnsPerKey - minColumnsPerKey + 1);
+      byte[][] columns = new byte[numColumns][];
+      for (int i = 0; i < numColumns; ++i) {
+        columns[i] = Integer.toString(i).getBytes();
+      }
+      return columns;
+    }
+
+    @Override
+    public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
+      return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
+    }
+
+    @Override
+    public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
+      return LoadTestKVGenerator.verify(value, rowKey, cf, column);
+    }
+
+    @Override
+    public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
+      return (columnSet.size() >= minColumnsPerKey) && (columnSet.size() <= maxColumnsPerKey);
+    }
+  }
 
   /** "R" or "W" */
   private String actionLetter;
@@ -62,11 +129,11 @@ public abstract class MultiThreadedActio
 
   public static final int REPORTING_INTERVAL_MS = 5000;
 
-  public MultiThreadedAction(Configuration conf, byte[] tableName,
-      byte[] columnFamily, String actionLetter) {
+  public MultiThreadedAction(LoadTestDataGenerator dataGen, Configuration conf, byte[] tableName,
+      String actionLetter) {
     this.conf = conf;
+    this.dataGenerator = dataGen;
     this.tableName = tableName;
-    this.columnFamily = columnFamily;
     this.actionLetter = actionLetter;
   }
 
@@ -165,17 +232,16 @@ public abstract class MultiThreadedActio
     }
   }
 
-  public void setDataSize(int minDataSize, int maxDataSize) {
-    this.minDataSize = minDataSize;
-    this.maxDataSize = maxDataSize;
-  }
-
   public void waitForFinish() {
     while (numThreadsWorking.get() != 0) {
       Threads.sleepWithoutInterrupt(1000);
     }
   }
 
+  public boolean isDone() {
+    return (numThreadsWorking.get() == 0);
+  }
+
   protected void startThreads(Collection<? extends Thread> threads) {
     numThreadsWorking.addAndGet(threads.size());
     for (Thread thread : threads) {
@@ -202,4 +268,77 @@ public abstract class MultiThreadedActio
     sb.append(v);
   }
 
+  /**
+   * See {@link #verifyResultAgainstDataGenerator(Result, boolean, boolean)}.
+   * Does not verify cf/column integrity.
+   */
+  public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues) {
+    return verifyResultAgainstDataGenerator(result, verifyValues, false);
+  }
+
+  /**
+   * Verifies the result from get or scan using the dataGenerator (that was presumably
+   * also used to generate said result).
+   * @param verifyValues verify that values in the result make sense for row/cf/column combination
+   * @param verifyCfAndColumnIntegrity verify that cf/column set in the result is complete. Note
+   *                                   that to use this multiPut should be used, or verification
+   *                                   has to happen after writes, otherwise there can be races.
+   * @return
+   */
+  public boolean verifyResultAgainstDataGenerator(Result result, boolean verifyValues,
+      boolean verifyCfAndColumnIntegrity) {
+    String rowKeyStr = Bytes.toString(result.getRow());
+
+    // See if we have any data at all.
+    if (result.isEmpty()) {
+      LOG.error("No data returned for key = [" + rowKeyStr + "]");
+      return false;
+    }
+
+    if (!verifyValues && !verifyCfAndColumnIntegrity) {
+      return true; // as long as we have something, we are good.
+    }
+
+    // See if we have all the CFs.
+    byte[][] expectedCfs = dataGenerator.getColumnFamilies();
+    if (verifyCfAndColumnIntegrity && (expectedCfs.length != result.getMap().size())) {
+      LOG.error("Bad family count for [" + rowKeyStr + "]: " + result.getMap().size());
+      return false;
+    }
+
+    // Verify each column family from get in the result.
+    for (byte[] cf : result.getMap().keySet()) {
+      String cfStr = Bytes.toString(cf);
+      Map<byte[], byte[]> columnValues = result.getFamilyMap(cf);
+      if (columnValues == null) {
+        LOG.error("No data for family [" + cfStr + "] for [" + rowKeyStr + "]");
+        return false;
+      }
+      // See if we have correct columns.
+      if (verifyCfAndColumnIntegrity
+          && !dataGenerator.verify(result.getRow(), cf, columnValues.keySet())) {
+        String colsStr = "";
+        for (byte[] col : columnValues.keySet()) {
+          if (colsStr.length() > 0) {
+            colsStr += ", ";
+          }
+          colsStr += "[" + Bytes.toString(col) + "]";
+        }
+        LOG.error("Bad columns for family [" + cfStr + "] for [" + rowKeyStr + "]: " + colsStr);
+        return false;
+      }
+      // See if values check out.
+      if (verifyValues) {
+        for (Map.Entry<byte[], byte[]> kv : columnValues.entrySet()) {
+          if (!dataGenerator.verify(result.getRow(), cf, kv.getKey(), kv.getValue())) {
+            LOG.error("Error checking data for key [" + rowKeyStr + "], column family ["
+              + cfStr + "], column [" + Bytes.toString(kv.getKey()) + "]; value of length " +
+              + kv.getValue().length);
+            return false;
+          }
+        }
+      }
+    }
+    return true;
+  }
 }

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java Wed Feb 13 20:58:23 2013
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
@@ -31,6 +32,7 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
 
 /** Creates multiple threads that read and verify previously written data */
@@ -72,9 +74,9 @@ public class MultiThreadedReader extends
   private int maxErrors = DEFAULT_MAX_ERRORS;
   private int keyWindow = DEFAULT_KEY_WINDOW;
 
-  public MultiThreadedReader(Configuration conf, byte[] tableName,
-      byte[] columnFamily, double verifyPercent) {
-    super(conf, tableName, columnFamily, "R");
+  public MultiThreadedReader(LoadTestDataGenerator dataGen, Configuration conf,
+      byte[] tableName, double verifyPercent) {
+    super(dataGen, conf, tableName, "R");
     this.verifyPercent = verifyPercent;
   }
 
@@ -223,14 +225,22 @@ public class MultiThreadedReader extends
     }
 
     private Get readKey(long keyToRead) {
-      Get get = new Get(
-          LoadTestKVGenerator.md5PrefixedKey(keyToRead).getBytes());
-      get.addFamily(columnFamily);
+      Get get = new Get(dataGenerator.getDeterministicUniqueKey(keyToRead));
+      String cfsString = "";
+      byte[][] columnFamilies = dataGenerator.getColumnFamilies();
+      for (byte[] cf : columnFamilies) {
+        get.addFamily(cf);
+        if (verbose) {
+          if (cfsString.length() > 0) {
+            cfsString += ", ";
+          }
+          cfsString += "[" + Bytes.toStringBinary(cf) + "]";
+        }
+      }
 
       try {
         if (verbose) {
-          LOG.info("[" + readerId + "] " + "Querying key " + keyToRead
-              + ", cf " + Bytes.toStringBinary(columnFamily));
+          LOG.info("[" + readerId + "] " + "Querying key " + keyToRead + ", cfs " + cfsString);
         }
         queryKey(get, random.nextInt(100) < verifyPercent);
       } catch (IOException e) {
@@ -250,47 +260,38 @@ public class MultiThreadedReader extends
       Result result = table.get(get);
       totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
       numKeys.addAndGet(1);
-
-      // if we got no data report error
-      if (result.isEmpty()) {
+      if (!result.isEmpty()) {
+        if (verify) {
+          numKeysVerified.incrementAndGet();
+        }
+      } else {
          HRegionLocation hloc = table.getRegionLocation(
              Bytes.toBytes(rowKey));
         LOG.info("Key = " + rowKey + ", RegionServer: "
             + hloc.getHostname());
-        numReadErrors.addAndGet(1);
-        LOG.error("No data returned, tried to get actions for key = "
-            + rowKey + (writer == null ? "" : ", keys inserted by writer: " +
-                writer.numKeys.get() + ")"));
-
-         if (numReadErrors.get() > maxErrors) {
-          LOG.error("Aborting readers -- found more than " + maxErrors
-              + " errors\n");
-           aborted = true;
-         }
       }
 
-      if (result.getFamilyMap(columnFamily) != null) {
-        // increment number of columns read
-        numCols.addAndGet(result.getFamilyMap(columnFamily).size());
-
-        if (verify) {
-          // verify the result
-          List<KeyValue> keyValues = result.list();
-          for (KeyValue kv : keyValues) {
-            String qual = new String(kv.getQualifier());
-
-            // if something does not look right report it
-            if (!LoadTestKVGenerator.verify(rowKey, qual, kv.getValue())) {
-              numReadErrors.addAndGet(1);
-              LOG.error("Error checking data for key = " + rowKey
-                  + ", actionId = " + qual);
-            }
-          }
-          numKeysVerified.addAndGet(1);
+      boolean isOk = verifyResultAgainstDataGenerator(result, verify);
+      long numErrorsAfterThis = 0;
+      if (isOk) {
+        long cols = 0;
+        // Count the columns for reporting purposes.
+        for (byte[] cf : result.getMap().keySet()) {
+          cols += result.getFamilyMap(cf).size();
+        }
+        numCols.addAndGet(cols);
+      } else {
+        if (writer != null) {
+          LOG.error("At the time of failure, writer inserted " + writer.numKeys.get() + " keys");
         }
+        numErrorsAfterThis = numReadErrors.incrementAndGet();
       }
-    }
 
+      if (numErrorsAfterThis > maxErrors) {
+        LOG.error("Aborting readers -- found more than " + maxErrors + " errors");
+        aborted = true;
+      }
+    }
   }
 
   public long getNumReadFailures() {

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java Wed Feb 13 20:58:23 2013
@@ -17,10 +17,12 @@
 package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.PriorityQueue;
 import java.util.Queue;
-import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
@@ -31,16 +33,19 @@ import java.util.concurrent.atomic.Atomi
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
 import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 
 /** Creates multiple threads that write key/values into the */
 public class MultiThreadedWriter extends MultiThreadedAction {
   private static final Log LOG = LogFactory.getLog(MultiThreadedWriter.class);
 
-  private long minColumnsPerKey = 1;
-  private long maxColumnsPerKey = 10;
   private Set<HBaseWriterThread> writers = new HashSet<HBaseWriterThread>();
 
   private boolean isMultiPut = false;
@@ -51,8 +56,7 @@ public class MultiThreadedWriter extends
    * {@link #insertedUpToKey}, the maximum key in the contiguous range of keys
    * being inserted. This queue is supposed to stay small.
    */
-  private BlockingQueue<Long> insertedKeys =
-      new ArrayBlockingQueue<Long>(10000);
+  private BlockingQueue<Long> insertedKeys = new ArrayBlockingQueue<Long>(10000);
 
   /**
    * This is the current key to be inserted by any thread. Each thread does an
@@ -78,9 +82,9 @@ public class MultiThreadedWriter extends
   /** Enable this if used in conjunction with a concurrent reader. */
   private boolean trackInsertedKeys;
 
-  public MultiThreadedWriter(Configuration conf, byte[] tableName,
-      byte[] columnFamily) {
-    super(conf, tableName, columnFamily, "W");
+  public MultiThreadedWriter(LoadTestDataGenerator dataGen, Configuration conf,
+    byte[] tableName) {
+    super(dataGen, conf, tableName, "W");
   }
 
   /** Use multi-puts vs. separate puts for every column in a row */
@@ -88,11 +92,6 @@ public class MultiThreadedWriter extends
     this.isMultiPut = isMultiPut;
   }
 
-  public void setColumnsPerKey(long minColumnsPerKey, long maxColumnsPerKey) {
-    this.minColumnsPerKey = minColumnsPerKey;
-    this.maxColumnsPerKey = maxColumnsPerKey;
-  }
-
   @Override
   public void start(long startKey, long endKey, int numThreads)
       throws IOException {
@@ -118,17 +117,9 @@ public class MultiThreadedWriter extends
     startThreads(writers);
   }
 
-  public static byte[] longToByteArrayKey(long rowKey) {
-    return LoadTestKVGenerator.md5PrefixedKey(rowKey).getBytes();
-  }
-
   private class HBaseWriterThread extends Thread {
     private final HTable table;
 
-    private final Random random = new Random();
-    private final LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(
-        minDataSize, maxDataSize);
-
     public HBaseWriterThread(int writerId) throws IOException {
       setName(getClass().getSimpleName() + "_" + writerId);
       table = new HTable(conf, tableName);
@@ -136,20 +127,36 @@ public class MultiThreadedWriter extends
 
     public void run() {
       try {
-        long rowKey;
-        while ((rowKey = nextKeyToInsert.getAndIncrement()) < endKey) {
-          long numColumns = minColumnsPerKey + Math.abs(random.nextLong())
-              % (maxColumnsPerKey - minColumnsPerKey);
+        long rowKeyBase;
+        byte[][] columnFamilies = dataGenerator.getColumnFamilies();
+        while ((rowKeyBase = nextKeyToInsert.getAndIncrement()) < endKey) {
+          byte[] rowKey = dataGenerator.getDeterministicUniqueKey(rowKeyBase);
+          Put put = new Put(rowKey);
           numKeys.addAndGet(1);
+          int columnCount = 0;
+          for (byte[] cf : columnFamilies) {
+            String s;
+            byte[][] columns = dataGenerator.generateColumnsForCf(rowKey, cf);
+            for (byte[] column : columns) {
+              byte[] value = dataGenerator.generateValue(rowKey, cf, column);
+              put.add(cf, column, value);
+              ++columnCount;
+              if (!isMultiPut) {
+                insert(put, rowKeyBase);
+                numCols.addAndGet(1);
+                put = new Put(rowKey);
+              }
+            }
+          }
           if (isMultiPut) {
-            multiPutInsertKey(rowKey, 0, numColumns);
-          } else {
-            for (long col = 0; col < numColumns; ++col) {
-              insert(rowKey, col);
+            if (verbose) {
+              LOG.debug("Preparing put for key = [" + rowKey + "], " + columnCount + " columns");
             }
+            insert(put, rowKeyBase);
+            numCols.addAndGet(columnCount);
           }
           if (trackInsertedKeys) {
-            insertedKeys.add(rowKey);
+            insertedKeys.add(rowKeyBase);
           }
         }
       } finally {
@@ -162,54 +169,51 @@ public class MultiThreadedWriter extends
       }
     }
 
-    public void insert(long rowKey, long col) {
-      Put put = new Put(longToByteArrayKey(rowKey));
-      String colAsStr = String.valueOf(col);
-      put.add(columnFamily, Bytes.toBytes(colAsStr),
-          dataGenerator.generateRandomSizeValue(rowKey, colAsStr));
+    public void insert(Put put, long keyBase) {
       try {
         long start = System.currentTimeMillis();
         table.put(put);
-        numCols.addAndGet(1);
         totalOpTimeMs.addAndGet(System.currentTimeMillis() - start);
       } catch (IOException e) {
-        failedKeySet.add(rowKey);
-        LOG.error("Failed to insert: " + rowKey);
-        e.printStackTrace();
+        failedKeySet.add(keyBase);
+        String exceptionInfo;
+        if (e instanceof RetriesExhaustedWithDetailsException) {
+          RetriesExhaustedWithDetailsException aggEx = (RetriesExhaustedWithDetailsException)e;
+          exceptionInfo = aggEx.getExhaustiveDescription();
+        } else {
+          StringWriter stackWriter = new StringWriter();
+          PrintWriter pw = new PrintWriter(stackWriter);
+          e.printStackTrace(pw);
+          pw.flush();
+          exceptionInfo = StringUtils.stringifyException(e);
+        }
+        LOG.error("Failed to insert: " + keyBase + "; region information: "
+            + getRegionDebugInfoSafe(put.getRow()) + "; errors: "
+            + exceptionInfo);
       }
     }
 
-    public void multiPutInsertKey(long rowKey, long startCol, long endCol) {
-      if (verbose) {
-        LOG.debug("Preparing put for key = " + rowKey + ", cols = ["
-            + startCol + ", " + endCol + ")");
-      }
-
-      if (startCol >= endCol) {
-        return;
+    private String getRegionDebugInfoSafe(byte[] rowKey) {
+      HRegionLocation cached = null, real = null;
+      try {
+        cached = table.getRegionLocation(rowKey, false);
+        real = table.getRegionLocation(rowKey, true);
+      } catch (Throwable t) {
+        // Cannot obtain region information for another catch block - too bad!
       }
-
-      Put put = new Put(LoadTestKVGenerator.md5PrefixedKey(
-          rowKey).getBytes());
-      byte[] columnQualifier;
-      byte[] value;
-      for (long i = startCol; i < endCol; ++i) {
-        String qualStr = String.valueOf(i);
-        columnQualifier = qualStr.getBytes();
-        value = dataGenerator.generateRandomSizeValue(rowKey, qualStr);
-        put.add(columnFamily, columnQualifier, value);
+      String result = "no information can be obtained";
+      if (cached != null) {
+        result = "cached: " + cached.toString();
       }
-
-      try {
-        long start = System.currentTimeMillis();
-        table.put(put);
-        numCols.addAndGet(endCol - startCol);
-        totalOpTimeMs.addAndGet(
-            System.currentTimeMillis() - start);
-      } catch (IOException e) {
-        failedKeySet.add(rowKey);
-        e.printStackTrace();
+      if (real != null) {
+        if (real.equals(cached)) {
+          result += "; cache is up to date";
+        } else {
+          result = (cached != null) ? (result + "; ") : "";
+          result += "real: " + real.toString();
+        }
       }
+      return result;
     }
   }
 
@@ -302,8 +306,7 @@ public class MultiThreadedWriter extends
    * key, which requires a blocking queue and a consumer thread.
    * @param enable whether to enable tracking the last inserted key
    */
-  void setTrackInsertedKeys(boolean enable) {
+  public void setTrackInsertedKeys(boolean enable) {
     trackInsertedKeys = enable;
   }
-
 }

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java Wed Feb 13 20:58:23 2013
@@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.client.Re
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 
 /**
  * A command-line tool that spins up a local process-based cluster, loads
@@ -59,8 +60,8 @@ public class RestartMetaTest extends Abs
   private void loadData() throws IOException {
     long startKey = 0;
     long endKey = 100000;
-    long minColsPerKey = 5;
-    long maxColsPerKey = 15;
+    int minColsPerKey = 5;
+    int maxColsPerKey = 15;
     int minColDataSize = 256;
     int maxColDataSize = 256 * 3;
     int numThreads = 10;
@@ -74,11 +75,10 @@ public class RestartMetaTest extends Abs
     System.out.printf("Client Threads: %d\n", numThreads);
 
     // start the writers
-    MultiThreadedWriter writer = new MultiThreadedWriter(conf, TABLE_NAME,
-        LoadTestTool.COLUMN_FAMILY);
+    LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(
+      minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, LoadTestTool.COLUMN_FAMILY);
+    MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
     writer.setMultiPut(true);
-    writer.setColumnsPerKey(minColsPerKey, maxColsPerKey);
-    writer.setDataSize(minColDataSize, maxColDataSize);
     writer.start(startKey, endKey, numThreads);
     System.out.printf("Started loading data...");
     writer.waitForFinish();

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Wed Feb 13 20:58:23 2013
@@ -1664,6 +1664,35 @@ public class TestHBaseFsck {
   }
 
   /**
+   * Test fixing lingering reference file.
+   */
+  @Test
+  public void testLingeringReferenceFile() throws Exception {
+    String table = "testLingeringReferenceFile";
+    try {
+      setupTable(table);
+      assertEquals(ROWKEYS.length, countRows());
+
+      // Mess it up by creating a fake reference file
+      FileSystem fs = FileSystem.get(conf);
+      Path tableDir= FSUtils.getTablePath(FSUtils.getRootDir(conf), table);
+      Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
+      Path famDir = new Path(regionDir, FAM_STR);
+      Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
+      fs.create(fakeReferenceFile);
+
+      HBaseFsck hbck = doFsck(conf, false);
+      assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
+      // fix reference file
+      doFsck(conf, true);
+      // check that reference file fixed
+      assertNoErrors(doFsck(conf, false));
+    } finally {
+      deleteTable(table);
+    }
+  }
+
+  /**
    * Test pluggable error reporter. It can be plugged in
    * from system property or configuration.
    */

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestIncrementingEnvironmentEdge.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestIncrementingEnvironmentEdge.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestIncrementingEnvironmentEdge.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestIncrementingEnvironmentEdge.java Wed Feb 13 20:58:23 2013
@@ -34,7 +34,7 @@ public class TestIncrementingEnvironment
 
   @Test
   public void testGetCurrentTimeUsesSystemClock() {
-    IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge();
+    IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(1);
     assertEquals(1, edge.currentTimeMillis());
     assertEquals(2, edge.currentTimeMillis());
     assertEquals(3, edge.currentTimeMillis());

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMergeTool.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMergeTool.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMergeTool.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMergeTool.java Wed Feb 13 20:58:23 2013
@@ -233,7 +233,7 @@ public class TestMergeTool extends HBase
       for (int j = 0; j < rows[i].length; j++) {
         Get get = new Get(rows[i][j]);
         get.addFamily(FAMILY);
-        Result result = merged.get(get, null);
+        Result result = merged.get(get);
         assertEquals(1, result.size());
         byte [] bytes = result.raw()[0].getValue();
         assertNotNull(Bytes.toStringBinary(rows[i][j]), bytes);
@@ -253,7 +253,7 @@ public class TestMergeTool extends HBase
       for (int j = 0; j < rows[i].length; j++) {
         Get get = new Get(rows[i][j]);
         get.addFamily(FAMILY);
-        Result result = regions[i].get(get, null);
+        Result result = regions[i].get(get);
         byte [] bytes = result.raw()[0].getValue();
         assertNotNull(bytes);
         assertTrue(Bytes.equals(bytes, rows[i][j]));

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java Wed Feb 13 20:58:23 2013
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.TableNotF
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -57,7 +58,7 @@ public class TestMiniClusterLoadSequenti
   protected static final byte[] CF = Bytes.toBytes("load_test_cf");
   protected static final int NUM_THREADS = 8;
   protected static final int NUM_RS = 2;
-  protected static final int TIMEOUT_MS = 120000;
+  protected static final int TIMEOUT_MS = 180000;
   protected static final HBaseTestingUtility TEST_UTIL =
       new HBaseTestingUtility();
 
@@ -139,9 +140,10 @@ public class TestMiniClusterLoadSequenti
 
     TEST_UTIL.waitUntilAllRegionsAssigned(numRegions);
 
-    writerThreads = new MultiThreadedWriter(conf, TABLE, CF);
+    LoadTestDataGenerator dataGen = new MultiThreadedAction.DefaultDataGenerator(CF);
+    writerThreads = new MultiThreadedWriter(dataGen, conf, TABLE);
     writerThreads.setMultiPut(isMultiPut);
-    readerThreads = new MultiThreadedReader(conf, TABLE, CF, 100);
+    readerThreads = new MultiThreadedReader(dataGen, conf, TABLE, 100);
   }
 
   protected int numKeys() {

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java Wed Feb 13 20:58:23 2013
@@ -38,13 +38,13 @@ public class HbckTestingUtil {
 
   public static HBaseFsck doFsck(
       Configuration conf, boolean fix, String table) throws Exception {
-    return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
+    return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, fix, table);
   }
 
   public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
       boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
       boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
-      String table) throws Exception {
+      boolean fixReferenceFiles, String table) throws Exception {
     HBaseFsck fsck = new HBaseFsck(conf, exec);
     fsck.connect();
     fsck.setDisplayFullReport(); // i.e. -details
@@ -56,6 +56,7 @@ public class HbckTestingUtil {
     fsck.setFixHdfsOrphans(fixHdfsOrphans);
     fsck.setFixTableOrphans(fixTableOrphans);
     fsck.setFixVersionFile(fixVersionFile);
+    fsck.setFixReferenceFiles(fixReferenceFiles);
     if (table != null) {
       fsck.includeTable(table);
     }

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/OfflineMetaRebuildTestCore.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/OfflineMetaRebuildTestCore.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/OfflineMetaRebuildTestCore.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/OfflineMetaRebuildTestCore.java Wed Feb 13 20:58:23 2013
@@ -108,7 +108,7 @@ public class OfflineMetaRebuildTestCore 
   @After
   public void tearDownAfter() throws Exception {
     TEST_UTIL.shutdownMiniCluster();
-    HConnectionManager.deleteConnection(conf, true);
+    HConnectionManager.deleteConnection(conf);
   }
 
   /**

Modified: hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java (original)
+++ hbase/branches/hbase-7290/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildBase.java Wed Feb 13 20:58:23 2013
@@ -57,7 +57,7 @@ public class TestOfflineMetaRebuildBase 
     // shutdown the minicluster
     TEST_UTIL.shutdownMiniHBaseCluster();
     TEST_UTIL.shutdownMiniZKCluster();
-    HConnectionManager.deleteConnection(conf, false);
+    HConnectionManager.deleteConnection(conf);
 
     // rebuild meta table from scratch
     HBaseFsck fsck = new HBaseFsck(conf);

Modified: hbase/branches/hbase-7290/pom.xml
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/pom.xml?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/pom.xml (original)
+++ hbase/branches/hbase-7290/pom.xml Wed Feb 13 20:58:23 2013
@@ -32,7 +32,7 @@
   <parent>
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
-    <version>8</version>
+    <version>12</version>
     <relativePath/>
     <!-- no parent resolution -->
   </parent>
@@ -52,8 +52,6 @@
     <module>hbase-server</module>
     <module>hbase-protocol</module>
     <module>hbase-client</module>
-    <module>hbase-hadoop2-compat</module>
-    <module>hbase-hadoop1-compat</module>
     <module>hbase-hadoop-compat</module>
     <module>hbase-common</module>
     <module>hbase-it</module>
@@ -266,7 +264,7 @@
       <name>Ted Yu</name>
       <email>yuzhihong@gmail.com</email>
       <timezone>-8</timezone>
-      <organization>EBay</organization>
+      <organization>Hortonworks</organization>
       <organizationUrl>http://www.ebay.com</organizationUrl>
     </developer>
     <developer>
@@ -309,6 +307,22 @@
       <organization>Cloudera</organization>
       <organizationUrl>http://www.cloudera.com</organizationUrl>
     </developer>
+    <developer>
+      <id>mbertozzi</id>
+      <name>Matteo Bertozzi</name>
+      <email>mbertozzi@apache.org</email>
+      <timezone>0</timezone>
+      <organization>Cloudera</organization>
+      <organizationUrl>http://www.cloudera.com</organizationUrl>
+    </developer>
+    <developer>
+      <id>zjushch</id>
+      <name>Chunhui Shen</name>
+      <email>zjushch@apache.org</email>
+      <timezone>+8</timezone>
+      <organization>Taobao</organization>
+      <organizationUrl>http://www.taobao.com</organizationUrl>
+    </developer>
   </developers>
   <repositories>
     <repository>
@@ -398,7 +412,6 @@
             <maxmemory>2g</maxmemory>
             <verbose>true</verbose>
             <reportOutputDirectory>target/apidocs</reportOutputDirectory>
-            <detectLinks>true</detectLinks>
           </configuration>
         </plugin>
         <plugin>
@@ -462,7 +475,7 @@
             <!-- surefire hack, if not we're
               using method parallelization class ! -->
             <testFailureIgnore>false</testFailureIgnore>
-            <forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds>
+            <forkedProcessTimeoutInSeconds>${surefire.timeout}</forkedProcessTimeoutInSeconds>
             <argLine>-enableassertions -Xmx1900m
               -Djava.security.egd=file:/dev/./urandom -Djava.net.preferIPv4Stack=true</argLine>
             <redirectTestOutputToFile>${test.output.tofile}</redirectTestOutputToFile>
@@ -507,7 +520,7 @@
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>findbugs-maven-plugin</artifactId>
-          <version>${findbugs.version}</version>
+          <version>${findbugs-maven-plugin.version}</version>
           <configuration>
             <excludeFilterFile>${project.basedir}/../dev-support/findbugs-exclude.xml</excludeFilterFile>
             <findbugsXmlOutput>true</findbugsXmlOutput>
@@ -702,6 +715,7 @@
           <tocMaxDepth>2</tocMaxDepth>
           <insertXrefPageNumber>yes</insertXrefPageNumber>
           <targetDirectory>${basedir}/target/docbkx</targetDirectory>
+          <chunkerOutputEncoding>UTF-8</chunkerOutputEncoding>
         </configuration>
         <executions>
           <execution>
@@ -731,6 +745,7 @@
         </executions>
       </plugin>
       <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-resources-plugin</artifactId>
         <version>${maven.resources.plugin.version}</version>
         <!--$NO-MVN-MAN-VER$ -->
@@ -848,7 +863,7 @@
     <jetty.jspapi.version>6.1.14</jetty.jspapi.version>
     <jersey.version>1.8</jersey.version>
     <jruby.version>1.6.8</jruby.version>
-    <junit.version>4.10-HBASE-1</junit.version>
+    <junit.version>4.11</junit.version>
     <htrace.version>1.49</htrace.version>
     <slf4j.version>1.4.3</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
@@ -866,10 +881,11 @@
     <maven.assembly.version>2.3</maven.assembly.version>
     <maven.antrun.version>1.6</maven.antrun.version>
     <jamon.plugin.version>2.3.4</jamon.plugin.version>
-    <findbugs.version>2.4.0</findbugs.version>
+    <findbugs-maven-plugin.version>2.5.2</findbugs-maven-plugin.version>
+    <findbugs-annotations>1.3.9-1</findbugs-annotations>
     <maven.site.version>3.1</maven.site.version>
-    <javadoc.version>2.8.1</javadoc.version>
-    <maven.resources.plugin.version>2.5</maven.resources.plugin.version>
+    <javadoc.version>2.9</javadoc.version>
+    <maven.resources.plugin.version>2.6</maven.resources.plugin.version>
     <!-- General Packaging -->
     <package.prefix>/usr</package.prefix>
     <package.conf.dir>/etc/hbase</package.conf.dir>
@@ -881,10 +897,10 @@
     <!-- TODO this is pretty ugly, but works for the moment.
       Modules are pretty heavy-weight things, so doing this work isn't too bad. -->
     <server.test.jar>hbase-server-${project.version}-tests.jar</server.test.jar>
+    <common.test.jar>hbase-common-${project.version}-tests.jar</common.test.jar>
     <it.test.jar>hbase-it-${project.version}-tests.jar</it.test.jar>
     <surefire.version>2.12-TRUNK-HBASE-2</surefire.version>
     <surefire.provider>surefire-junit47</surefire.provider>
-    <compat.module>hbase-hadoop1-compat</compat.module>
     <!-- default: run small & medium, medium with 2 threads -->
     <surefire.skipFirstPart>false</surefire.skipFirstPart>
     <surefire.skipSecondPart>false</surefire.skipSecondPart>
@@ -895,6 +911,7 @@
     <surefire.firstPartGroups>org.apache.hadoop.hbase.SmallTests</surefire.firstPartGroups>
     <surefire.secondPartGroups>org.apache.hadoop.hbase.MediumTests</surefire.secondPartGroups>
     <test.output.tofile>true</test.output.tofile>
+    <surefire.timeout>900</surefire.timeout>
   </properties>
   <!-- Sorted by groups of dependencies then groupId and artifactId -->
   <dependencyManagement>
@@ -1225,9 +1242,7 @@
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
         <version>${junit.version}</version>
-        <scope>test</scope>
-        <!-- FIXME: the following needs to go away once HBASE-4955 is fixed -->
-        <optional>true</optional>
+        <scope>runtime</scope>
       </dependency>
       <dependency>
         <groupId>org.cloudera.htrace</groupId>
@@ -1244,6 +1259,14 @@
   </dependencyManagement>
   <!-- Dependencies needed by subprojects -->
   <dependencies>
+
+    <dependency>
+      <groupId>com.github.stephenc.findbugs</groupId>
+      <artifactId>findbugs-annotations</artifactId>
+      <version>${findbugs-annotations}</version>
+      <scope>compile</scope>
+    </dependency>
+
     <!-- Test dependencies -->
     <dependency>
       <groupId>junit</groupId>
@@ -1303,6 +1326,37 @@
         <build.platform>Mac_OS_X-${sun.arch.data.model}</build.platform>
       </properties>
     </profile>
+    <profile>
+      <id>os.windows</id>
+      <activation>
+        <os>
+          <family>Windows</family>
+        </os>
+      </activation>
+      <properties>
+        <build.platform>cygwin</build.platform>
+      </properties>
+      <build>
+        <pluginManagement>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration>
+              <argLine>-enableassertions -Xmx1900m -Djava.security.egd=file:/dev/./urandom -Djava.net.preferIPv4Stack=true</argLine>
+              <systemProperties>
+                <property>
+                  <name>java.net.preferIPv4Stack</name>
+                  <value>true</value>
+		</property>
+              </systemProperties>
+            </configuration>
+          </plugin>
+        </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
+
     <!-- this profile should be activated for release builds -->
     <profile>
       <id>release</id>
@@ -1336,6 +1390,9 @@
           <name>!hadoop.profile</name>
         </property>
       </activation>
+      <modules>
+        <module>hbase-hadoop1-compat</module>
+      </modules>
       <properties>
         <hadoop.version>${hadoop-one.version}</hadoop.version>
         <slf4j.version>1.4.3</slf4j.version>
@@ -1392,6 +1449,9 @@
           <value>2.0</value>
         </property>
       </activation>
+      <modules>
+        <module>hbase-hadoop2-compat</module>
+      </modules>
       <properties>
         <slf4j.version>1.6.1</slf4j.version>
         <hadoop.version>${hadoop-two.version}</hadoop.version>
@@ -1678,9 +1738,13 @@
     </plugins>
   </reporting>
   <distributionManagement>
-    <site>
-      <id>apache.website</id>
-      <url>scp://people.apache.org/www/hbase.apache.org/</url>
-    </site>
+      <site>
+          <id>hbase.apache.org</id>
+          <name>HBase Website at hbase.apache.org</name>
+          <!-- On why this is the tmp dir and not hbase.apache.org, see
+               https://issues.apache.org/jira/browse/HBASE-7593?focusedCommentId=13555866&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-13555866
+               -->
+          <url>file:///tmp</url>
+      </site>
   </distributionManagement>
 </project>

Modified: hbase/branches/hbase-7290/src/assembly/components.xml
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/src/assembly/components.xml?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/src/assembly/components.xml (original)
+++ hbase/branches/hbase-7290/src/assembly/components.xml Wed Feb 13 20:58:23 2013
@@ -112,5 +112,13 @@
       </includes>
       <fileMode>0644</fileMode>
     </fileSet>
+    <fileSet>
+      <directory>hbase-common/target/</directory>
+      <outputDirectory>lib</outputDirectory>
+      <includes>
+        <include>${common.test.jar}</include>
+      </includes>
+      <fileMode>0644</fileMode>
+    </fileSet>
   </fileSets>
 </component>
\ No newline at end of file

Modified: hbase/branches/hbase-7290/src/docbkx/book.xml
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/src/docbkx/book.xml?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/src/docbkx/book.xml (original)
+++ hbase/branches/hbase-7290/src/docbkx/book.xml Wed Feb 13 20:58:23 2013
@@ -2343,7 +2343,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_PO
           </itemizedlist>
           </para>
           <para>The following StoreFiles exist: 100, 25, 12, and 12 bytes apiece (oldest to newest).
-          With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
+          With the above parameters, no compaction will be started.
           </para>
           <para>Why?
           <itemizedlist>

Modified: hbase/branches/hbase-7290/src/docbkx/configuration.xml
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/src/docbkx/configuration.xml?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/src/docbkx/configuration.xml (original)
+++ hbase/branches/hbase-7290/src/docbkx/configuration.xml Wed Feb 13 20:58:23 2013
@@ -402,7 +402,7 @@ to ensure well-formedness of your docume
 
         <para>Distributed modes require an instance of the <emphasis>Hadoop
         Distributed File System</emphasis> (HDFS). See the Hadoop <link
-        xlink:href="http://hadoop.apache.org/common/docs/current/api/overview-summary.html#overview_description">
+        xlink:href="http://hadoop.apache.org/common/docs/r1.1.1/api/overview-summary.html#overview_description">
         requirements and instructions</link> for how to set up a HDFS. Before
         proceeding, ensure you have an appropriate, working HDFS.</para>
 
@@ -1051,6 +1051,54 @@ index e70ebc6..96f8c27 100644
        RegionSize can also be set on a per-table basis via
        <link xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HTableDescriptor.html">HTableDescriptor</link>.
       </para>
+      <section xml:id="too_many_regions">
+          <title>How many regions per RegionServer?</title>
+          <para>
+              Typically you want to keep your region count low on HBase for numerous reasons.
+              Usually right around 100 regions per RegionServer has yielded the best results.
+              Here are some of the reasons below for keeping region count low:
+              <unorderedlist>
+                  <listitem><para>
+                          MSLAB requires 2mb per memstore (that's 2mb per family per region).
+                          1000 regions that have 2 families each is 3.9GB of heap used, and it's not even storing data yet. NB: the 2MB value is configurable.
+                  </para></listitem>
+                  <listitem><para>If you fill all the regions at somewhat the same rate, the global memory usage makes it that it forces tiny
+                          flushes when you have too many regions which in turn generates compactions.
+                          Rewriting the same data tens of times is the last thing you want.
+                          An example is filling 1000 regions (with one family) equally and let's consider a lower bound for global memstore
+                          usage of 5GB (the region server would have a big heap).
+                          Once it reaches 5GB it will force flush the biggest region,
+                          at that point they should almost all have about 5MB of data so
+                          it would flush that amount. 5MB inserted later, it would flush another
+                          region that will now have a bit over 5MB of data, and so on.
+                          A basic formula for the amount of regions to have per region server would
+                          look like this:
+                          Heap * upper global memstore limit = amount of heap devoted to memstore
+                          then the amount of heap devoted to memstore / (Number of regions per RS * CFs).
+                          This will give you the rough memstore size if everything is being written to.
+                          A more accurate formula is
+                          Heap * upper global memstore limit = amount of heap devoted to memstore then the
+                          amount of heap devoted to memstore / (Number of actively written regions per RS * CFs).
+                          This can allot you a higher region count from the write perspective if you know how many
+                          regions you will be writing to at one time.
+                  </para></listitem>
+                  <listitem><para>The master as is is allergic to tons of regions, and will
+                          take a lot of time assigning them and moving them around in batches.
+                          The reason is that it's heavy on ZK usage, and it's not very async
+                          at the moment (could really be improved -- and has been imporoved a bunch
+                          in 0.96 hbase).
+                  </para></listitem>
+                  <listitem><para>
+                          In older versions of HBase (pre-v2 hfile, 0.90 and previous), tons of regions
+                          on a few RS can cause the store file index to rise raising heap usage and can
+                          create memory pressure or OOME on the RSs
+                  </para></listitem>
+          </unorderedlist>
+      </para>
+      <para>Another issue is the effect of the number of regions on mapreduce jobs.
+          Keeping 5 regions per RS would be too low for a job, whereas 1000 will generate too many maps.
+      </para>
+      </section>
 
       </section>
       <section xml:id="disable.splitting">

Modified: hbase/branches/hbase-7290/src/docbkx/customization.xsl
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/src/docbkx/customization.xsl?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/src/docbkx/customization.xsl (original)
+++ hbase/branches/hbase-7290/src/docbkx/customization.xsl Wed Feb 13 20:58:23 2013
@@ -22,6 +22,7 @@
  */
 -->
   <xsl:import href="urn:docbkx:stylesheet"/>
+  <xsl:output method="html" encoding="UTF-8" indent="no"/>
 
   <xsl:template name="user.header.content">
   </xsl:template>

Modified: hbase/branches/hbase-7290/src/docbkx/developer.xml
URL: http://svn.apache.org/viewvc/hbase/branches/hbase-7290/src/docbkx/developer.xml?rev=1445918&r1=1445917&r2=1445918&view=diff
==============================================================================
--- hbase/branches/hbase-7290/src/docbkx/developer.xml (original)
+++ hbase/branches/hbase-7290/src/docbkx/developer.xml Wed Feb 13 20:58:23 2013
@@ -163,7 +163,7 @@ mvn clean package -DskipTests
       <section xml:id="build.tgz">
         <title>Building the HBase tarball</title>
         <para>Do the following to build the HBase tarball.
-        Passing the -Drelease will generate javadoc and run the RAT plugin to verify licenses on source.
+        Passing the -Prelease will generate javadoc and run the RAT plugin to verify licenses on source.
         <programlisting>% MAVEN_OPTS="-Xmx2g" mvn clean site install assembly:assembly -DskipTests -Prelease</programlisting>
 </para>
       </section>
@@ -308,6 +308,19 @@ What is the new development version for 
 [INFO] -----------------------------------------------------------------------</programlisting>
         </para>
       </section>
+      <section xml:id="documentation">
+          <title>Generating the HBase Reference Guide</title>
+          <para>The manual is marked up using <link xlink:href="http://www.docbook.org/">docbook</link>.
+              We then use the <link xlink:href="http://code.google.com/p/docbkx-tools/">docbkx maven plugin</link>
+              to transform the markup to html.  This plugin is run when you specify the <command>site</command>
+              goal as in when you run <command>mvn site</command> or you can call the plugin explicitly to
+              just generate the manual by doing <command>mvn docbkx:generate-html</command>
+              (TODO: It looks like you have to run <command>mvn site</command> first because docbkx wants to
+              include a transformed <filename>hbase-default.xml</filename>.  Fix).
+              When you run mvn site, we do the document generation twice, once to generate the multipage
+              manual and then again for the single page manual (the single page version is easier to search).
+          </para>
+      </section>
     <section xml:id="hbase.org">
     <title>Updating hbase.apache.org</title>
       <section xml:id="hbase.org.site.contributing">
@@ -320,12 +333,23 @@ What is the new development version for 
       </section>
       <section xml:id="hbase.org.site.publishing">
       <title>Publishing hbase.apache.org</title>
-      <para>If you're a committer with rights to publish the site artifacts: set up your apache credentials and the target site location locally in a place and
-      form that maven can pick it up, in <filename>~/.m2/settings.xml</filename>.  See <xref linked="mvn_repo" /> for an example.
-      Next, run the following:
-      <programlisting>$ mvn -DskipTests -Papache-release site site:deploy</programlisting>
-      You will be asked for your password.  It can take a little time.
-      Remember that it can take a few hours for your site changes to show up.
+      <para>As of <link xlink:href="https://issues.apache.org/jira/browse/INFRA-5680">INFRA-5680 Migrate apache hbase website</link>,
+          to publish the website, build it, and then deploy it over a checkout of <filename>https://svn.apache.org/repos/asf/hbase/hbase.apache.org/trunk</filename>,
+          and then check it in.  For example, if trunk is checked out out at <filename>/Users/stack/checkouts/trunk</filename>
+          and hbase.apache.org is checked out at <filename>/Users/stack/checkouts/hbase.apache.org/trunk</filename>, to update
+          the site, do the following:
+          <programlisting>
+              # Build the site and deploy it to the checked out directory
+              # Getting the javadoc into site is a little tricky.  You have to build it independent, then
+              # 'aggregate' it at top-level so the pre-site site lifecycle step can find it; that is
+              # what the javadoc:javadoc and javadoc:aggregate is about.
+              $ MAVEN_OPTS=" -Xmx3g" mvn clean -DskipTests javadoc:javadoc javadoc:aggregate site  site:stage -DstagingDirectory=/Users/stack/checkouts/hbase.apache.org/trunk
+              # Check the deployed site by viewing in a brower.
+              # If all is good, commit it and it will show up at http://hbase.apache.org
+              #
+              $ cd /Users/stack/checkouts/hbase.apache.org/trunk
+              $ svn commit -m 'Committing latest version of website...'
+          </programlisting>
       </para>
       </section>
     </section>
@@ -477,11 +501,13 @@ error will be reported when a non-existe
 
 <section xml:id="hbase.unittests.cmds.test.profiles">
 <title>Other test invocation permutations</title>
-<para>Running <programlisting>mvn test -P runSmallTests</programlisting> will execute small tests only, in a single JVM.
+<para>Running <programlisting>mvn test -P runSmallTests</programlisting> will execute "small" tests only, using a single JVM.
 </para>
-<para>Running <programlisting>mvn test -P runMediumTests</programlisting> will execute medium tests in a single JVM.
+<para>Running <programlisting>mvn test -P runMediumTests</programlisting> will execute "medium" tests only, launching a new JVM for each test-class.
 </para>
-<para>Running <programlisting>mvn test -P runLargeTests</programlisting> execute medium tests in a single JVM.
+<para>Running <programlisting>mvn test -P runLargeTests</programlisting> will execute "large" tests only, launching a new JVM for each test-class.
+</para>
+<para>For convenience, you can run <programlisting>mvn test -P runDevTests</programlisting> to execute both small and medium tests, using a single JVM.
 </para>
 </section>
 
@@ -1046,6 +1072,12 @@ Bar bar = foo.getBar();     &lt;--- imag
          </section>
           <section xml:id="common.patch.feedback.writable">
             <title>Implementing Writable</title>
+            <note>
+                <title>Applies pre-0.96 only</title>
+                <para>In 0.96, HBase moved to protobufs.  The below section on Writables
+                    applies to 0.94.x and previous, not to 0.96 and beyond.
+                </para>
+            </note>
             <para>Every class returned by RegionServers must implement <code>Writable</code>.  If you
             are creating a new class that needs to implement this interface, don't forget the default constructor.
             </para>
@@ -1053,8 +1085,29 @@ Bar bar = foo.getBar();     &lt;--- imag
           <section xml:id="common.patch.feedback.javadoc">
             <title>Javadoc</title>
             <para>This is also a very common feedback item.  Don't forget Javadoc!
+                <para>Javadoc warnings are checked during precommit. If the precommit tool gives you a '-1',
+                    please fix the javadoc issue. Your patch won't be committed if it adds such warnings.
+                </para>
             </para>
           </section>
+          <section xml:id="common.patch.feedback.findbugs">
+            <title>Findbugs</title>
+                <para>
+                    Findbugs is used to detect common bugs pattern. As Javadoc, it is checked during
+                    the precommit build up on Apache's Jenkins, and as with Javadoc, please fix them.
+                    You can run findbugs locally with 'mvn findbugs:findbugs': it will generate the
+                    findbugs files locally.  Sometimes, you may have to write code smarter than
+                    Findbugs. You can annotate your code to tell Findbugs you know what you're
+                    doing, by annotating your class with:
+                    <programlisting>@edu.umd.cs.findbugs.annotations.SuppressWarnings(
+                    value="HE_EQUALS_USE_HASHCODE",
+                    justification="I know what I'm doing")</programlisting>
+            </para>
+            <para>
+                    Note that we're using the apache licensed version of the annotations.
+            </para>
+          </section>
+
           <section xml:id="common.patch.feedback.javadoc.defaults">
             <title>Javadoc - Useless Defaults</title>
             <para>Don't just leave the @param arguments the way your IDE generated them.  Don't do this...
@@ -1101,6 +1154,18 @@ Bar bar = foo.getBar();     &lt;--- imag
           </para>
           <para>Commiters will also resolve the Jira, typically after the patch passes a build.
           </para>
+          <section xml:id="committer.tests">
+              <title>Committers are responsible for making sure commits do not break the build or tests</title>
+          <para>
+              If a committer commits a patch it is their responsibility
+              to make sure it passes the test suite.  It is helpful
+              if contributors keep an eye out that their patch
+              does not break the hbase build and/or tests but ultimately,
+              a contributor cannot be expected to be up on the
+              particular vagaries and interconnections that occur
+              in a project like hbase.  A committer should.
+            </para>
+        </section>
         </section>
 
        </section>   <!--  submitting patches -->



Mime
View raw message