Return-Path: Delivered-To: apmail-hadoop-hbase-commits-archive@minotaur.apache.org Received: (qmail 13621 invoked from network); 25 Feb 2009 06:00:03 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 25 Feb 2009 06:00:03 -0000 Received: (qmail 89995 invoked by uid 500); 25 Feb 2009 06:00:02 -0000 Delivered-To: apmail-hadoop-hbase-commits-archive@hadoop.apache.org Received: (qmail 89973 invoked by uid 500); 25 Feb 2009 06:00:02 -0000 Mailing-List: contact hbase-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hbase-dev@hadoop.apache.org Delivered-To: mailing list hbase-commits@hadoop.apache.org Received: (qmail 89964 invoked by uid 99); 25 Feb 2009 06:00:02 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 24 Feb 2009 22:00:02 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED,OBSCURED_EMAIL X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 25 Feb 2009 05:59:50 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 0DFED2388B31; Wed, 25 Feb 2009 05:59:29 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r747672 [3/4] - in /hadoop/hbase/trunk: conf/ src/java/org/apache/hadoop/hbase/io/ src/java/org/apache/hadoop/hbase/io/hfile/ src/java/org/apache/hadoop/hbase/regionserver/ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/... Date: Wed, 25 Feb 2009 05:59:27 -0000 To: hbase-commits@hadoop.apache.org From: stack@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090225055929.0DFED2388B31@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (added) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,336 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HStoreKey; +import org.apache.hadoop.hbase.filter.RowFilterInterface; +import org.apache.hadoop.hbase.io.Cell; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Scanner scans both the memcache and the HStore + */ +class StoreScanner implements InternalScanner, ChangedReadersObserver { + static final Log LOG = LogFactory.getLog(StoreScanner.class); + + private InternalScanner[] scanners; + private TreeMap[] resultSets; + private HStoreKey[] keys; + private boolean wildcardMatch = false; + private boolean multipleMatchers = false; + private RowFilterInterface dataFilter; + private Store store; + private final long timestamp; + private final byte [][] targetCols; + + // Indices for memcache scanner and hstorefile scanner. + private static final int MEMS_INDEX = 0; + private static final int HSFS_INDEX = MEMS_INDEX + 1; + + // Used around transition from no storefile to the first. + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + // Used to indicate that the scanner has closed (see HBASE-1107) + private final AtomicBoolean closing = new AtomicBoolean(false); + + /** Create an Scanner with a handle on the memcache and HStore files. */ + @SuppressWarnings("unchecked") + StoreScanner(Store store, byte [][] targetCols, byte [] firstRow, + long timestamp, RowFilterInterface filter) + throws IOException { + this.store = store; + this.dataFilter = filter; + if (null != dataFilter) { + dataFilter.reset(); + } + this.scanners = new InternalScanner[2]; + this.resultSets = new TreeMap[scanners.length]; + this.keys = new HStoreKey[scanners.length]; + // Save these args in case we need them later handling change in readers + // See updateReaders below. + this.timestamp = timestamp; + this.targetCols = targetCols; + try { + scanners[MEMS_INDEX] = + store.memcache.getScanner(timestamp, targetCols, firstRow); + scanners[HSFS_INDEX] = + new StoreFileScanner(store, timestamp, targetCols, firstRow); + for (int i = MEMS_INDEX; i < scanners.length; i++) { + checkScannerFlags(i); + } + } catch (IOException e) { + doClose(); + throw e; + } + + // Advance to the first key in each scanner. + // All results will match the required column-set and scanTime. + for (int i = MEMS_INDEX; i < scanners.length; i++) { + setupScanner(i); + } + + this.store.addChangedReaderObserver(this); + } + + /* + * @param i Index. + */ + private void checkScannerFlags(final int i) { + if (this.scanners[i].isWildcardScanner()) { + this.wildcardMatch = true; + } + if (this.scanners[i].isMultipleMatchScanner()) { + this.multipleMatchers = true; + } + } + + /* + * Do scanner setup. + * @param i + * @throws IOException + */ + private void setupScanner(final int i) throws IOException { + this.keys[i] = new HStoreKey(); + this.resultSets[i] = new TreeMap(Bytes.BYTES_COMPARATOR); + if (this.scanners[i] != null && !this.scanners[i].next(this.keys[i], + this.resultSets[i])) { + closeScanner(i); + } + } + + /** @return true if the scanner is a wild card scanner */ + public boolean isWildcardScanner() { + return this.wildcardMatch; + } + + /** @return true if the scanner is a multiple match scanner */ + public boolean isMultipleMatchScanner() { + return this.multipleMatchers; + } + + public boolean next(HStoreKey key, SortedMap results) + throws IOException { + this.lock.readLock().lock(); + try { + // Filtered flag is set by filters. If a cell has been 'filtered out' + // -- i.e. it is not to be returned to the caller -- the flag is 'true'. + boolean filtered = true; + boolean moreToFollow = true; + while (filtered && moreToFollow) { + // Find the lowest-possible key. + byte [] chosenRow = null; + long chosenTimestamp = -1; + for (int i = 0; i < this.keys.length; i++) { + if (scanners[i] != null && + (chosenRow == null || + (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) < 0) || + ((HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0) && + (keys[i].getTimestamp() > chosenTimestamp)))) { + chosenRow = keys[i].getRow(); + chosenTimestamp = keys[i].getTimestamp(); + } + } + + // Filter whole row by row key? + filtered = dataFilter != null? dataFilter.filterRowKey(chosenRow) : false; + + // Store the key and results for each sub-scanner. Merge them as + // appropriate. + if (chosenTimestamp >= 0 && !filtered) { + // Here we are setting the passed in key with current row+timestamp + key.setRow(chosenRow); + key.setVersion(chosenTimestamp); + key.setColumn(HConstants.EMPTY_BYTE_ARRAY); + // Keep list of deleted cell keys within this row. We need this + // because as we go through scanners, the delete record may be in an + // early scanner and then the same record with a non-delete, non-null + // value in a later. Without history of what we've seen, we'll return + // deleted values. This List should not ever grow too large since we + // are only keeping rows and columns that match those set on the + // scanner and which have delete values. If memory usage becomes a + // problem, could redo as bloom filter. + Set deletes = new HashSet(); + for (int i = 0; i < scanners.length && !filtered; i++) { + while ((scanners[i] != null + && !filtered + && moreToFollow) + && (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0)) { + // If we are doing a wild card match or there are multiple + // matchers per column, we need to scan all the older versions of + // this row to pick up the rest of the family members + if (!wildcardMatch + && !multipleMatchers + && (keys[i].getTimestamp() != chosenTimestamp)) { + break; + } + + // NOTE: We used to do results.putAll(resultSets[i]); + // but this had the effect of overwriting newer + // values with older ones. So now we only insert + // a result if the map does not contain the key. + HStoreKey hsk = new HStoreKey(key.getRow(), + HConstants.EMPTY_BYTE_ARRAY, + key.getTimestamp()); + for (Map.Entry e : resultSets[i].entrySet()) { + hsk.setColumn(e.getKey()); + if (HLogEdit.isDeleted(e.getValue().getValue())) { + // Only first key encountered is added; deletes is a Set. + deletes.add(new HStoreKey(hsk)); + } else if (!deletes.contains(hsk) && + !filtered && + moreToFollow && + !results.containsKey(e.getKey())) { + if (dataFilter != null) { + // Filter whole row by column data? + filtered = dataFilter.filterColumn(chosenRow, e.getKey(), + e.getValue().getValue()); + if (filtered) { + results.clear(); + break; + } + } + results.put(e.getKey(), e.getValue()); + } + } + resultSets[i].clear(); + if (!scanners[i].next(keys[i], resultSets[i])) { + closeScanner(i); + } + } + } + } + for (int i = 0; i < scanners.length; i++) { + // If the current scanner is non-null AND has a lower-or-equal + // row label, then its timestamp is bad. We need to advance it. + while ((scanners[i] != null) && + (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) <= 0)) { + resultSets[i].clear(); + if (!scanners[i].next(keys[i], resultSets[i])) { + closeScanner(i); + } + } + } + + moreToFollow = chosenTimestamp >= 0; + if (dataFilter != null) { + if (dataFilter.filterAllRemaining()) { + moreToFollow = false; + } + } + + if (results.size() <= 0 && !filtered) { + // There were no results found for this row. Marked it as + // 'filtered'-out otherwise we will not move on to the next row. + filtered = true; + } + } + + // If we got no results, then there is no more to follow. + if (results == null || results.size() <= 0) { + moreToFollow = false; + } + + // Make sure scanners closed if no more results + if (!moreToFollow) { + for (int i = 0; i < scanners.length; i++) { + if (null != scanners[i]) { + closeScanner(i); + } + } + } + + return moreToFollow; + } finally { + this.lock.readLock().unlock(); + } + } + + /** Shut down a single scanner */ + void closeScanner(int i) { + try { + try { + scanners[i].close(); + } catch (IOException e) { + LOG.warn(store.storeName + " failed closing scanner " + i, e); + } + } finally { + scanners[i] = null; + keys[i] = null; + resultSets[i] = null; + } + } + + public void close() { + this.closing.set(true); + this.store.deleteChangedReaderObserver(this); + doClose(); + } + + private void doClose() { + for (int i = MEMS_INDEX; i < scanners.length; i++) { + if (scanners[i] != null) { + closeScanner(i); + } + } + } + + // Implementation of ChangedReadersObserver + + public void updateReaders() throws IOException { + if (this.closing.get()) { + return; + } + this.lock.writeLock().lock(); + try { + Map map = this.store.getStorefiles(); + if (this.scanners[HSFS_INDEX] == null && map != null && map.size() > 0) { + // Presume that we went from no readers to at least one -- need to put + // a HStoreScanner in place. + try { + // I think its safe getting key from mem at this stage -- it shouldn't have + // been flushed yet + this.scanners[HSFS_INDEX] = new StoreFileScanner(this.store, + this.timestamp, this. targetCols, this.keys[MEMS_INDEX].getRow()); + checkScannerFlags(HSFS_INDEX); + setupScanner(HSFS_INDEX); + LOG.debug("Added a StoreFileScanner to outstanding HStoreScanner"); + } catch (IOException e) { + doClose(); + throw e; + } + } + } finally { + this.lock.writeLock().unlock(); + } + } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,365 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.math.random.RandomData; +import org.apache.commons.math.random.RandomDataImpl; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.util.Bytes; + +/** + *

+ * This class runs performance benchmarks for {@link HFile}. + *

+ */ +public class HFilePerformanceEvaluation { + + private static final int ROW_LENGTH = 10; + private static final int ROW_COUNT = 1000000; + private static final int RFILE_BLOCKSIZE = 8 * 1024; + + static final Log LOG = + LogFactory.getLog(HFilePerformanceEvaluation.class.getName()); + + static byte [] format(final int i) { + String v = Integer.toString(i); + return Bytes.toBytes("0000000000".substring(v.length()) + v); + } + + static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) { + w.set(format(i)); + return w; + } + + private void runBenchmarks() throws Exception { + final Configuration conf = new Configuration(); + final FileSystem fs = FileSystem.get(conf); + final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile")); + if (fs.exists(mf)) { + fs.delete(mf, true); + } + + runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + PerformanceEvaluationCommons.concurrentReads(new Runnable() { + public void run() { + try { + runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + PerformanceEvaluationCommons.concurrentReads(new Runnable() { + public void run() { + try { + runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + PerformanceEvaluationCommons.concurrentReads(new Runnable() { + public void run() { + try { + runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + PerformanceEvaluationCommons.concurrentReads(new Runnable() { + public void run() { + try { + runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + + } + + private void runBenchmark(RowOrientedBenchmark benchmark, int rowCount) + throws Exception { + LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " + + rowCount + " rows."); + long elapsedTime = benchmark.run(); + LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " + + rowCount + " rows took " + elapsedTime + "ms."); + } + + static abstract class RowOrientedBenchmark { + + protected final Configuration conf; + protected final FileSystem fs; + protected final Path mf; + protected final int totalRows; + + public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + this.conf = conf; + this.fs = fs; + this.mf = mf; + this.totalRows = totalRows; + } + + void setUp() throws Exception { + // do nothing + } + + abstract void doRow(int i) throws Exception; + + protected int getReportingPeriod() { + return this.totalRows / 10; + } + + void tearDown() throws Exception { + // do nothing + } + + /** + * Run benchmark + * @return elapsed time. + * @throws Exception + */ + long run() throws Exception { + long elapsedTime; + setUp(); + long startTime = System.currentTimeMillis(); + try { + for (int i = 0; i < totalRows; i++) { + if (i > 0 && i % getReportingPeriod() == 0) { + LOG.info("Processed " + i + " rows."); + } + doRow(i); + } + elapsedTime = System.currentTimeMillis() - startTime; + } finally { + tearDown(); + } + return elapsedTime; + } + + } + + static class SequentialWriteBenchmark extends RowOrientedBenchmark { + protected HFile.Writer writer; + private Random random = new Random(); + private byte[] bytes = new byte[ROW_LENGTH]; + + public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void setUp() throws Exception { + writer = new HFile.Writer(this.fs, this.mf, RFILE_BLOCKSIZE, null, null); + } + + @Override + void doRow(int i) throws Exception { + writer.append(format(i), generateValue()); + } + + private byte[] generateValue() { + random.nextBytes(bytes); + return bytes; + } + + @Override + protected int getReportingPeriod() { + return this.totalRows; // don't report progress + } + + @Override + void tearDown() throws Exception { + writer.close(); + } + + } + + static abstract class ReadBenchmark extends RowOrientedBenchmark { + ImmutableBytesWritable key = new ImmutableBytesWritable(); + ImmutableBytesWritable value = new ImmutableBytesWritable(); + + protected HFile.Reader reader; + + public ReadBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void setUp() throws Exception { + reader = new HFile.Reader(this.fs, this.mf, null); + this.reader.loadFileInfo(); + } + + @Override + void tearDown() throws Exception { + reader.close(); + } + + } + + static class SequentialReadBenchmark extends ReadBenchmark { + private HFileScanner scanner; + + public SequentialReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) + throws IOException { + super(conf, fs, mf, totalRows); + } + + @Override + void setUp() throws Exception { + super.setUp(); + this.scanner = this.reader.getScanner(); + this.scanner.seekTo(); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + if (this.scanner.next()) { + ByteBuffer k = this.scanner.getKey(); + PerformanceEvaluationCommons.assertKey(format(i + 1), k); + ByteBuffer v = scanner.getValue(); + PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH); + } + } + + @Override + protected int getReportingPeriod() { + return this.totalRows; // don't report progress + } + + } + + static class UniformRandomReadBenchmark extends ReadBenchmark { + + private Random random = new Random(); + + public UniformRandomReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + HFileScanner scanner = this.reader.getScanner(); + byte [] b = getRandomRow(); + scanner.seekTo(b); + ByteBuffer k = scanner.getKey(); + PerformanceEvaluationCommons.assertKey(b, k); + ByteBuffer v = scanner.getValue(); + PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH); + } + + private byte [] getRandomRow() { + return format(random.nextInt(totalRows)); + } + } + + static class UniformRandomSmallScan extends ReadBenchmark { + private Random random = new Random(); + + public UniformRandomSmallScan(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows/10); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + HFileScanner scanner = this.reader.getScanner(); + byte [] b = getRandomRow(); + if (scanner.seekTo(b) != 0) { + System.out.println("Nonexistent row: " + new String(b)); + return; + } + ByteBuffer k = scanner.getKey(); + PerformanceEvaluationCommons.assertKey(b, k); + // System.out.println("Found row: " + new String(b)); + for (int ii = 0; ii < 30; ii++) { + if (!scanner.next()) { + System.out.println("NOTHING FOLLOWS"); + } + ByteBuffer v = scanner.getValue(); + PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH); + } + } + + private byte [] getRandomRow() { + return format(random.nextInt(totalRows)); + } + } + + static class GaussianRandomReadBenchmark extends ReadBenchmark { + + private RandomData randomData = new RandomDataImpl(); + + public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + HFileScanner scanner = this.reader.getScanner(); + scanner.seekTo(getGaussianRandomRowBytes()); + for (int ii = 0; ii < 30; ii++) { + if (!scanner.next()) { + System.out.println("NOTHING FOLLOWS"); + } + scanner.getKey(); + scanner.getValue(); + } + } + + private byte [] getGaussianRandomRowBytes() { + int r = (int) randomData.nextGaussian(totalRows / 2, totalRows / 10); + return format(r); + } + } + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws Exception { + new HFilePerformanceEvaluation().runBenchmarks(); + } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,78 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + + +/** + * Code shared by PE tests. + */ +public class PerformanceEvaluationCommons { + static final Log LOG = + LogFactory.getLog(PerformanceEvaluationCommons.class.getName()); + + public static void assertValueSize(final int expectedSize, final int got) { + if (got != expectedSize) { + throw new AssertionError("Expected " + expectedSize + " but got " + got); + } + } + + public static void assertKey(final byte [] expected, final ByteBuffer got) { + byte [] b = new byte[got.limit()]; + got.get(b, 0, got.limit()); + assertKey(expected, b); + } + + public static void assertKey(final byte [] expected, final byte [] got) { + if (!org.apache.hadoop.hbase.util.Bytes.equals(expected, got)) { + throw new AssertionError("Expected " + + org.apache.hadoop.hbase.util.Bytes.toString(expected) + + " but got " + org.apache.hadoop.hbase.util.Bytes.toString(got)); + } + } + + public static void concurrentReads(final Runnable r) { + final int count = 1; + long now = System.currentTimeMillis(); + List threads = new ArrayList(count); + for (int i = 0; i < count; i++) { + Thread t = new Thread(r); + t.setName("" + i); + threads.add(t); + } + for (Thread t: threads) { + t.start(); + } + for (Thread t: threads) { + try { + t.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + LOG.info("Test took " + (System.currentTimeMillis() - now)); + } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import junit.framework.TestCase; + +import org.apache.hadoop.hbase.HStoreKey.StoreKeyByteComparator; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Writables; + +/** + * Tests for the HStoreKey Plain and Meta RawComparators. + */ +public class TestHStoreKey extends TestCase { + protected void setUp() throws Exception { + super.setUp(); + } + + protected void tearDown() throws Exception { + super.tearDown(); + } + + public void testByteBuffer() throws Exception { + final long ts = 123; + final byte [] row = Bytes.toBytes("row"); + final byte [] column = Bytes.toBytes("column"); + HStoreKey hsk = new HStoreKey(row, column, ts); + ByteBuffer bb = ByteBuffer.wrap(hsk.getBytes()); + assertTrue(Bytes.equals(row, HStoreKey.getRow(bb))); + assertTrue(Bytes.equals(column, HStoreKey.getColumn(bb))); + assertEquals(ts, HStoreKey.getTimestamp(bb)); + } + + /** + * Test the byte comparator works same as the object comparator. + */ + public void testRawComparator() throws IOException { + long timestamp = System.currentTimeMillis(); + byte [] a = Bytes.toBytes("a"); + HStoreKey past = new HStoreKey(a, a, timestamp - 10); + byte [] pastBytes = Writables.getBytes(past); + HStoreKey now = new HStoreKey(a, a, timestamp); + byte [] nowBytes = Writables.getBytes(now); + HStoreKey future = new HStoreKey(a, a, timestamp + 10); + byte [] futureBytes = Writables.getBytes(future); + StoreKeyByteComparator comparator = new HStoreKey.StoreKeyByteComparator(); + assertTrue(past.compareTo(now) > 0); + assertTrue(comparator.compare(pastBytes, nowBytes) > 0); + assertTrue(now.compareTo(now) == 0); + assertTrue(comparator.compare(nowBytes, nowBytes) == 0); + assertTrue(future.compareTo(now) < 0); + assertTrue(comparator.compare(futureBytes, nowBytes) < 0); + // Check that empty column comes before one with a column + HStoreKey nocolumn = new HStoreKey(a, timestamp); + byte [] nocolumnBytes = Writables.getBytes(nocolumn); + HStoreKey withcolumn = new HStoreKey(a, a, timestamp); + byte [] withcolumnBytes = Writables.getBytes(withcolumn); + assertTrue(nocolumn.compareTo(withcolumn) < 0); + assertTrue(comparator.compare(nocolumnBytes, withcolumnBytes) < 0); + // Check that empty column comes and LATEST comes before one with a column + // and old timestamp. + nocolumn = new HStoreKey(a, HConstants.LATEST_TIMESTAMP); + nocolumnBytes = Writables.getBytes(nocolumn); + withcolumn = new HStoreKey(a, a, timestamp); + withcolumnBytes = Writables.getBytes(withcolumn); + assertTrue(nocolumn.compareTo(withcolumn) < 0); + assertTrue(comparator.compare(nocolumnBytes, withcolumnBytes) < 0); + } + +// /** +// * Tests cases where rows keys have characters below the ','. +// * See HBASE-832 +// * @throws IOException +// */ +// public void testHStoreKeyBorderCases() throws IOException { +// HRegionInfo info = new HRegionInfo(new HTableDescriptor("testtable"), +// HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY); +// +// HStoreKey rowA = new HStoreKey("testtable,www.hbase.org/,1234", +// "", Long.MAX_VALUE, info); +// byte [] rowABytes = Writables.getBytes(rowA); +// HStoreKey rowB = new HStoreKey("testtable,www.hbase.org/%20,99999", +// "", Long.MAX_VALUE, info); +// byte [] rowBBytes = Writables.getBytes(rowB); +// assertTrue(rowA.compareTo(rowB) > 0); +// HStoreKey.Comparator comparator = new HStoreKey.PlainStoreKeyComparator(); +// assertTrue(comparator.compare(rowABytes, rowBBytes) > 0); +// +// rowA = new HStoreKey("testtable,www.hbase.org/,1234", +// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO); +// rowB = new HStoreKey("testtable,www.hbase.org/%20,99999", +// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO); +// assertTrue(rowA.compareTo(rowB) < 0); +// assertTrue(comparator.compare(rowABytes, rowBBytes) < 0); +// +// rowA = new HStoreKey("testtable,,1234", +// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO); +// rowB = new HStoreKey("testtable,$www.hbase.org/,99999", +// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO); +// assertTrue(rowA.compareTo(rowB) < 0); +// assertTrue(comparator.compare(rowABytes, rowBBytes) < 0); +// +// rowA = new HStoreKey(".META.,testtable,www.hbase.org/,1234,4321", +// "", Long.MAX_VALUE, HRegionInfo.ROOT_REGIONINFO); +// rowB = new HStoreKey(".META.,testtable,www.hbase.org/%20,99999,99999", +// "", Long.MAX_VALUE, HRegionInfo.ROOT_REGIONINFO); +// assertTrue(rowA.compareTo(rowB) > 0); +// assertTrue(comparator.compare(rowABytes, rowBBytes) > 0); +// } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.util.Random; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.WritableComparator; + +/** + * Generate random pairs. + *

+ * Copied from + * hadoop-3315 tfile. + * Remove after tfile is committed and use the tfile version of this class + * instead.

+ */ +class KVGenerator { + private final Random random; + private final byte[][] dict; + private final boolean sorted; + private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG; + private BytesWritable lastKey; + private static final int MIN_KEY_LEN = 4; + private final byte prefix[] = new byte[MIN_KEY_LEN]; + + public KVGenerator(Random random, boolean sorted, + RandomDistribution.DiscreteRNG keyLenRNG, + RandomDistribution.DiscreteRNG valLenRNG, + RandomDistribution.DiscreteRNG wordLenRNG, int dictSize) { + this.random = random; + dict = new byte[dictSize][]; + this.sorted = sorted; + this.keyLenRNG = keyLenRNG; + this.valLenRNG = valLenRNG; + for (int i = 0; i < dictSize; ++i) { + int wordLen = wordLenRNG.nextInt(); + dict[i] = new byte[wordLen]; + random.nextBytes(dict[i]); + } + lastKey = new BytesWritable(); + fillKey(lastKey); + } + + private void fillKey(BytesWritable o) { + int len = keyLenRNG.nextInt(); + if (len < MIN_KEY_LEN) len = MIN_KEY_LEN; + o.setSize(len); + int n = MIN_KEY_LEN; + while (n < len) { + byte[] word = dict[random.nextInt(dict.length)]; + int l = Math.min(word.length, len - n); + System.arraycopy(word, 0, o.get(), n, l); + n += l; + } + if (sorted + && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey + .getSize() + - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) { + incrementPrefix(); + } + + System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN); + lastKey.set(o); + } + + private void fillValue(BytesWritable o) { + int len = valLenRNG.nextInt(); + o.setSize(len); + int n = 0; + while (n < len) { + byte[] word = dict[random.nextInt(dict.length)]; + int l = Math.min(word.length, len - n); + System.arraycopy(word, 0, o.get(), n, l); + n += l; + } + } + + private void incrementPrefix() { + for (int i = MIN_KEY_LEN - 1; i >= 0; --i) { + ++prefix[i]; + if (prefix[i] != 0) return; + } + + throw new RuntimeException("Prefix overflown"); + } + + public void next(BytesWritable key, BytesWritable value, boolean dupKey) { + if (dupKey) { + key.set(lastKey); + } + else { + fillKey(key); + } + fillValue(value); + } +} Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Random; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.hbase.io.hfile.RandomDistribution.DiscreteRNG; + +/* +*

+* Copied from +* hadoop-3315 tfile. +* Remove after tfile is committed and use the tfile version of this class +* instead.

+*/ +class KeySampler { + Random random; + int min, max; + DiscreteRNG keyLenRNG; + private static final int MIN_KEY_LEN = 4; + + public KeySampler(Random random, byte [] first, byte [] last, + DiscreteRNG keyLenRNG) throws IOException { + this.random = random; + min = keyPrefixToInt(first); + max = keyPrefixToInt(last); + this.keyLenRNG = keyLenRNG; + } + + private int keyPrefixToInt(byte [] key) throws IOException { + byte[] b = key; + int o = 0; + return (b[o] & 0xff) << 24 | (b[o + 1] & 0xff) << 16 + | (b[o + 2] & 0xff) << 8 | (b[o + 3] & 0xff); + } + + public void next(BytesWritable key) { + key.setSize(Math.max(MIN_KEY_LEN, keyLenRNG.nextInt())); + random.nextBytes(key.get()); + int n = random.nextInt(max - min) + min; + byte[] b = key.get(); + b[0] = (byte) (n >> 24); + b[1] = (byte) (n >> 16); + b[2] = (byte) (n >> 8); + b[3] = (byte) n; + } +} Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +/** + * A nano-second timer. + *

+ * Copied from + * hadoop-3315 tfile. + * Remove after tfile is committed and use the tfile version of this class + * instead.

+ */ +public class NanoTimer { + private long last = -1; + private boolean started = false; + private long cumulate = 0; + + /** + * Constructor + * + * @param start + * Start the timer upon construction. + */ + public NanoTimer(boolean start) { + if (start) this.start(); + } + + /** + * Start the timer. + * + * Note: No effect if timer is already started. + */ + public void start() { + if (!this.started) { + this.last = System.nanoTime(); + this.started = true; + } + } + + /** + * Stop the timer. + * + * Note: No effect if timer is already stopped. + */ + public void stop() { + if (this.started) { + this.started = false; + this.cumulate += System.nanoTime() - this.last; + } + } + + /** + * Read the timer. + * + * @return the elapsed time in nano-seconds. Note: If the timer is never + * started before, -1 is returned. + */ + public long read() { + if (!readable()) return -1; + + return this.cumulate; + } + + /** + * Reset the timer. + */ + public void reset() { + this.last = -1; + this.started = false; + this.cumulate = 0; + } + + /** + * Checking whether the timer is started + * + * @return true if timer is started. + */ + public boolean isStarted() { + return this.started; + } + + /** + * Format the elapsed time to a human understandable string. + * + * Note: If timer is never started, "ERR" will be returned. + */ + public String toString() { + if (!readable()) { + return "ERR"; + } + + return NanoTimer.nanoTimeToString(this.cumulate); + } + + /** + * A utility method to format a time duration in nano seconds into a human + * understandable stirng. + * + * @param t + * Time duration in nano seconds. + * @return String representation. + */ + public static String nanoTimeToString(long t) { + if (t < 0) return "ERR"; + + if (t == 0) return "0"; + + if (t < 1000) { + return t + "ns"; + } + + double us = (double) t / 1000; + if (us < 1000) { + return String.format("%.2fus", us); + } + + double ms = us / 1000; + if (ms < 1000) { + return String.format("%.2fms", ms); + } + + double ss = ms / 1000; + if (ss < 1000) { + return String.format("%.2fs", ss); + } + + long mm = (long) ss / 60; + ss -= mm * 60; + long hh = mm / 60; + mm -= hh * 60; + long dd = hh / 24; + hh -= dd * 24; + + if (dd > 0) { + return String.format("%dd%dh", dd, hh); + } + + if (hh > 0) { + return String.format("%dh%dm", hh, mm); + } + + if (mm > 0) { + return String.format("%dm%.1fs", mm, ss); + } + + return String.format("%.2fs", ss); + + /** + * StringBuilder sb = new StringBuilder(); String sep = ""; + * + * if (dd > 0) { String unit = (dd > 1) ? "days" : "day"; + * sb.append(String.format("%s%d%s", sep, dd, unit)); sep = " "; } + * + * if (hh > 0) { String unit = (hh > 1) ? "hrs" : "hr"; + * sb.append(String.format("%s%d%s", sep, hh, unit)); sep = " "; } + * + * if (mm > 0) { String unit = (mm > 1) ? "mins" : "min"; + * sb.append(String.format("%s%d%s", sep, mm, unit)); sep = " "; } + * + * if (ss > 0) { String unit = (ss > 1) ? "secs" : "sec"; + * sb.append(String.format("%s%.3f%s", sep, ss, unit)); sep = " "; } + * + * return sb.toString(); + */ + } + + private boolean readable() { + return this.last != -1; + } + + /** + * Simple tester. + * + * @param args + */ + public static void main(String[] args) { + long i = 7; + + for (int x = 0; x < 20; ++x, i *= 7) { + System.out.println(NanoTimer.nanoTimeToString(i)); + } + } +} + Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,271 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Random; + +/** + * A class that generates random numbers that follow some distribution. + *

+ * Copied from + * hadoop-3315 tfile. + * Remove after tfile is committed and use the tfile version of this class + * instead.

+ */ +public class RandomDistribution { + /** + * Interface for discrete (integer) random distributions. + */ + public static interface DiscreteRNG { + /** + * Get the next random number + * + * @return the next random number. + */ + public int nextInt(); + } + + /** + * P(i)=1/(max-min) + */ + public static final class Flat implements DiscreteRNG { + private final Random random; + private final int min; + private final int max; + + /** + * Generate random integers from min (inclusive) to max (exclusive) + * following even distribution. + * + * @param random + * The basic random number generator. + * @param min + * Minimum integer + * @param max + * maximum integer (exclusive). + * + */ + public Flat(Random random, int min, int max) { + if (min >= max) { + throw new IllegalArgumentException("Invalid range"); + } + this.random = random; + this.min = min; + this.max = max; + } + + /** + * @see DiscreteRNG#nextInt() + */ + @Override + public int nextInt() { + return random.nextInt(max - min) + min; + } + } + + /** + * Zipf distribution. The ratio of the probabilities of integer i and j is + * defined as follows: + * + * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma. + */ + public static final class Zipf implements DiscreteRNG { + private static final double DEFAULT_EPSILON = 0.001; + private final Random random; + private final ArrayList k; + private final ArrayList v; + + /** + * Constructor + * + * @param r + * The random number generator. + * @param min + * minimum integer (inclusvie) + * @param max + * maximum integer (exclusive) + * @param sigma + * parameter sigma. (sigma > 1.0) + */ + public Zipf(Random r, int min, int max, double sigma) { + this(r, min, max, sigma, DEFAULT_EPSILON); + } + + /** + * Constructor. + * + * @param r + * The random number generator. + * @param min + * minimum integer (inclusvie) + * @param max + * maximum integer (exclusive) + * @param sigma + * parameter sigma. (sigma > 1.0) + * @param epsilon + * Allowable error percentage (0 < epsilon < 1.0). + */ + public Zipf(Random r, int min, int max, double sigma, double epsilon) { + if ((max <= min) || (sigma <= 1) || (epsilon <= 0) + || (epsilon >= 0.5)) { + throw new IllegalArgumentException("Invalid arguments"); + } + random = r; + k = new ArrayList(); + v = new ArrayList(); + + double sum = 0; + int last = -1; + for (int i = min; i < max; ++i) { + sum += Math.exp(-sigma * Math.log(i - min + 1)); + if ((last == -1) || i * (1 - epsilon) > last) { + k.add(i); + v.add(sum); + last = i; + } + } + + if (last != max - 1) { + k.add(max - 1); + v.add(sum); + } + + v.set(v.size() - 1, 1.0); + + for (int i = v.size() - 2; i >= 0; --i) { + v.set(i, v.get(i) / sum); + } + } + + /** + * @see DiscreteRNG#nextInt() + */ + @Override + public int nextInt() { + double d = random.nextDouble(); + int idx = Collections.binarySearch(v, d); + + if (idx > 0) { + ++idx; + } + else { + idx = -(idx + 1); + } + + if (idx >= v.size()) { + idx = v.size() - 1; + } + + if (idx == 0) { + return k.get(0); + } + + int ceiling = k.get(idx); + int lower = k.get(idx - 1); + + return ceiling - random.nextInt(ceiling - lower); + } + } + + /** + * Binomial distribution. + * + * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n) + * + * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1) + */ + public static final class Binomial implements DiscreteRNG { + private final Random random; + private final int min; + private final int n; + private final double[] v; + + private static double select(int n, int k) { + double ret = 1.0; + for (int i = k + 1; i <= n; ++i) { + ret *= (double) i / (i - k); + } + return ret; + } + + private static double power(double p, int k) { + return Math.exp(k * Math.log(p)); + } + + /** + * Generate random integers from min (inclusive) to max (exclusive) + * following Binomial distribution. + * + * @param random + * The basic random number generator. + * @param min + * Minimum integer + * @param max + * maximum integer (exclusive). + * @param p + * parameter. + * + */ + public Binomial(Random random, int min, int max, double p) { + if (min >= max) { + throw new IllegalArgumentException("Invalid range"); + } + this.random = random; + this.min = min; + this.n = max - min - 1; + if (n > 0) { + v = new double[n + 1]; + double sum = 0.0; + for (int i = 0; i <= n; ++i) { + sum += select(n, i) * power(p, i) * power(1 - p, n - i); + v[i] = sum; + } + for (int i = 0; i <= n; ++i) { + v[i] /= sum; + } + } + else { + v = null; + } + } + + /** + * @see DiscreteRNG#nextInt() + */ + @Override + public int nextInt() { + if (v == null) { + return min; + } + double d = random.nextDouble(); + int idx = Arrays.binarySearch(v, d); + if (idx > 0) { + ++idx; + } else { + idx = -(idx + 1); + } + + if (idx >= v.length) { + idx = v.length - 1; + } + return idx + min; + } + } +} Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,124 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.hbase.io.hfile.HFile.Reader; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Random seek test. + */ +public class RandomSeek { + private static List slurp(String fname) throws IOException { + BufferedReader istream = new BufferedReader(new FileReader(fname)); + String str; + List l = new ArrayList(); + while ( (str=istream.readLine()) != null) { + String [] parts = str.split(","); + l.add(parts[0] + ":" + parts[1] + ":" + parts[2]); + } + return l; + } + private static String randKey(List keys) { + Random r = new Random(); + //return keys.get(r.nextInt(keys.size())); + return "2" + Integer.toString(7+r.nextInt(2)) + Integer.toString(r.nextInt(100)); + //return new String(r.nextInt(100)); + } + + public static void main(String [] argv) throws IOException { + Configuration conf = new Configuration(); + conf.setInt("io.file.buffer.size", 64*1024); + RawLocalFileSystem rlfs = new RawLocalFileSystem(); + rlfs.setConf(conf); + LocalFileSystem lfs = new LocalFileSystem(rlfs); + + Path path = new Path("/Users/ryan/rfile.big.txt"); + long start = System.currentTimeMillis(); + SimpleBlockCache cache = new SimpleBlockCache(); + //LruBlockCache cache = new LruBlockCache(); + Reader reader = new HFile.Reader(lfs, path, cache); + reader.loadFileInfo(); + System.out.println(reader.trailer); + long end = System.currentTimeMillis(); + + System.out.println("Index read time: " + (end - start)); + + List keys = slurp("/Users/ryan/xaa.50k"); + + HFileScanner scanner = reader.getScanner(); + int count; + long totalBytes = 0; + int notFound = 0; + + start = System.nanoTime(); + for(count = 0; count < 500000; ++count) { + String key = randKey(keys); + byte [] bkey = Bytes.toBytes(key); + int res = scanner.seekTo(bkey); + if (res == 0) { + ByteBuffer k = scanner.getKey(); + ByteBuffer v = scanner.getValue(); + totalBytes += k.limit(); + totalBytes += v.limit(); + } else { + ++ notFound; + } + if (res == -1) { + scanner.seekTo(); + } + // Scan for another 1000 rows. + for (int i = 0; i < 1000; ++i) { + if (!scanner.next()) + break; + ByteBuffer k = scanner.getKey(); + ByteBuffer v = scanner.getValue(); + totalBytes += k.limit(); + totalBytes += v.limit(); + } + + if ( count % 1000 == 0 ) { + end = System.nanoTime(); + + System.out.println("Cache block count: " + cache.size() + " dumped: "+ cache.dumps); + //System.out.println("Cache size: " + cache.heapSize()); + double msTime = ((end - start) / 1000000.0); + System.out.println("Seeked: "+ count + " in " + msTime + " (ms) " + + (1000.0 / msTime ) + " seeks/ms " + + (msTime / 1000.0) + " ms/seek"); + + start = System.nanoTime(); + } + } + System.out.println("Total bytes: " + totalBytes + " not found: " + notFound); + } +} Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,248 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.io.hfile.HFile.Reader; +import org.apache.hadoop.hbase.io.hfile.HFile.Writer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.RawComparator; + +/** + * test hfile features. + *

+ * Copied from + * hadoop-3315 tfile. + * Remove after tfile is committed and use the tfile version of this class + * instead.

+ */ +public class TestHFile extends TestCase { + static final Log LOG = LogFactory.getLog(TestHFile.class); + + private static String ROOT_DIR = + System.getProperty("test.build.data", "/tmp/TestHFile"); + private FileSystem fs; + private Configuration conf; + private final int minBlockSize = 512; + private static String localFormatter = "%010d"; + + @Override + public void setUp() { + conf = new HBaseConfiguration(); + RawLocalFileSystem rawLFS = new RawLocalFileSystem(); + rawLFS.setConf(conf); + fs = new LocalFileSystem(rawLFS); + } + + // write some records into the tfile + // write them twice + private int writeSomeRecords(Writer writer, int start, int n) + throws IOException { + String value = "value"; + for (int i = start; i < (start + n); i++) { + String key = String.format(localFormatter, Integer.valueOf(i)); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key)); + } + return (start + n); + } + + private void readAllRecords(HFileScanner scanner) throws IOException { + readAndCheckbytes(scanner, 0, 100); + } + + // read the records and check + private int readAndCheckbytes(HFileScanner scanner, int start, int n) + throws IOException { + String value = "value"; + int i = start; + for (; i < (start + n); i++) { + ByteBuffer key = scanner.getKey(); + ByteBuffer val = scanner.getValue(); + String keyStr = String.format(localFormatter, Integer.valueOf(i)); + String valStr = value + keyStr; + byte [] keyBytes = Bytes.toBytes(key); + assertTrue("bytes for keys do not match " + keyStr + " " + + Bytes.toString(Bytes.toBytes(key)), + Arrays.equals(Bytes.toBytes(keyStr), keyBytes)); + byte [] valBytes = Bytes.toBytes(val); + assertTrue("bytes for vals do not match " + valStr + " " + + Bytes.toString(valBytes), + Arrays.equals(Bytes.toBytes(valStr), valBytes)); + if (!scanner.next()) { + break; + } + } + assertEquals(i, start + n - 1); + return (start + n); + } + + private byte[] getSomeKey(int rowId) { + return String.format(localFormatter, Integer.valueOf(rowId)).getBytes(); + } + + private void writeRecords(Writer writer) throws IOException { + writeSomeRecords(writer, 0, 100); + writer.close(); + } + + private FSDataOutputStream createFSOutput(Path name) throws IOException { + if (fs.exists(name)) fs.delete(name, true); + FSDataOutputStream fout = fs.create(name); + return fout; + } + + /** + * test none codecs + */ + void basicWithSomeCodec(String codec) throws IOException { + Path ncTFile = new Path(ROOT_DIR, "basic.hfile"); + FSDataOutputStream fout = createFSOutput(ncTFile); + Writer writer = new Writer(fout, minBlockSize, codec, null); + LOG.info(writer); + writeRecords(writer); + fout.close(); + FSDataInputStream fin = fs.open(ncTFile); + Reader reader = new Reader(fs.open(ncTFile), + fs.getFileStatus(ncTFile).getLen(), null); + // Load up the index. + reader.loadFileInfo(); + LOG.info(reader); + HFileScanner scanner = reader.getScanner(); + // Align scanner at start of the file. + scanner.seekTo(); + readAllRecords(scanner); + scanner.seekTo(getSomeKey(50)); + assertTrue("location lookup failed", scanner.seekTo(getSomeKey(50)) == 0); + // read the key and see if it matches + ByteBuffer readKey = scanner.getKey(); + assertTrue("seeked key does not match", Arrays.equals(getSomeKey(50), + Bytes.toBytes(readKey))); + + scanner.seekTo(new byte[0]); + ByteBuffer val1 = scanner.getValue(); + scanner.seekTo(new byte[0]); + ByteBuffer val2 = scanner.getValue(); + assertTrue(Arrays.equals(Bytes.toBytes(val1), Bytes.toBytes(val2))); + + reader.close(); + fin.close(); + fs.delete(ncTFile, true); + } + + public void testTFileFeatures() throws IOException { + basicWithSomeCodec("none"); + basicWithSomeCodec("gz"); + } + + private void writeNumMetablocks(Writer writer, int n) { + for (int i = 0; i < n; i++) { + writer.appendMetaBlock("TfileMeta" + i, ("something to test" + i).getBytes()); + } + } + + private void someTestingWithMetaBlock(Writer writer) { + writeNumMetablocks(writer, 10); + } + + private void readNumMetablocks(Reader reader, int n) throws IOException { + for (int i = 0; i < n; i++) { + ByteBuffer b = reader.getMetaBlock("TfileMeta" + i); + byte [] found = Bytes.toBytes(b); + assertTrue("failed to match metadata", Arrays.equals( + ("something to test" + i).getBytes(), found)); + } + } + + private void someReadingWithMetaBlock(Reader reader) throws IOException { + readNumMetablocks(reader, 10); + } + + private void metablocks(final String compress) throws Exception { + Path mFile = new Path(ROOT_DIR, "meta.tfile"); + FSDataOutputStream fout = createFSOutput(mFile); + Writer writer = new Writer(fout, minBlockSize, compress, null); + someTestingWithMetaBlock(writer); + writer.close(); + fout.close(); + FSDataInputStream fin = fs.open(mFile); + Reader reader = new Reader(fs.open(mFile), this.fs.getFileStatus(mFile) + .getLen(), null); + reader.loadFileInfo(); + // No data -- this should return false. + assertFalse(reader.getScanner().seekTo()); + someReadingWithMetaBlock(reader); + fs.delete(mFile, true); + reader.close(); + fin.close(); + } + + // test meta blocks for tfiles + public void testMetaBlocks() throws Exception { + metablocks("none"); + metablocks("gz"); + } + + /** + * Make sure the orginals for our compression libs doesn't change on us. + */ + public void testCompressionOrdinance() { + assertTrue(Compression.Algorithm.LZO.ordinal() == 0); + assertTrue(Compression.Algorithm.GZ.ordinal() == 1); + assertTrue(Compression.Algorithm.NONE.ordinal() == 2); + } + + + public void testComparator() throws IOException { + Path mFile = new Path(ROOT_DIR, "meta.tfile"); + FSDataOutputStream fout = createFSOutput(mFile); + Writer writer = new Writer(fout, minBlockSize, "none", + new RawComparator() { + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, + int l2) { + return -Bytes.compareTo(b1, s1, l1, b2, s2, l2); + + } + @Override + public int compare(byte[] o1, byte[] o2) { + return compare(o1, 0, o1.length, o2, 0, o2.length); + } + }); + writer.append("3".getBytes(), "0".getBytes()); + writer.append("2".getBytes(), "0".getBytes()); + writer.append("1".getBytes(), "0".getBytes()); + writer.close(); + } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java?rev=747672&view=auto ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java (added) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java Wed Feb 25 05:59:26 2009 @@ -0,0 +1,387 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.LzoCodec; + +/** + * Set of long-running tests to measure performance of HFile. + *

+ * Copied from + * hadoop-3315 tfile. + * Remove after tfile is committed and use the tfile version of this class + * instead.

+ */ +public class TestHFilePerformance extends TestCase { + private static String ROOT_DIR = + System.getProperty("test.build.data", "/tmp/TestHFilePerformance"); + private FileSystem fs; + private Configuration conf; + private long startTimeEpoch; + private long finishTimeEpoch; + private DateFormat formatter; + + @Override + public void setUp() throws IOException { + conf = new Configuration(); + fs = FileSystem.get(conf); + formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + } + + public void startTime() { + startTimeEpoch = System.currentTimeMillis(); + System.out.println(formatTime() + " Started timing."); + } + + public void stopTime() { + finishTimeEpoch = System.currentTimeMillis(); + System.out.println(formatTime() + " Stopped timing."); + } + + public long getIntervalMillis() { + return finishTimeEpoch - startTimeEpoch; + } + + public void printlnWithTimestamp(String message) { + System.out.println(formatTime() + " " + message); + } + + /* + * Format millis into minutes and seconds. + */ + public String formatTime(long milis){ + return formatter.format(milis); + } + + public String formatTime(){ + return formatTime(System.currentTimeMillis()); + } + + private FSDataOutputStream createFSOutput(Path name) throws IOException { + if (fs.exists(name)) + fs.delete(name, true); + FSDataOutputStream fout = fs.create(name); + return fout; + } + + //TODO have multiple ways of generating key/value e.g. dictionary words + //TODO to have a sample compressable data, for now, made 1 out of 3 values random + // keys are all random. + + private static class KeyValueGenerator { + Random keyRandomizer; + Random valueRandomizer; + long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random. + long valueSequence = 0 ; + + + KeyValueGenerator() { + keyRandomizer = new Random(0L); //TODO with seed zero + valueRandomizer = new Random(1L); //TODO with seed one + } + + // Key is always random now. + void getKey(byte[] key) { + keyRandomizer.nextBytes(key); + } + + void getValue(byte[] value) { + if (valueSequence % randomValueRatio == 0) + valueRandomizer.nextBytes(value); + valueSequence++; + } + } + + /** + * + * @param fileType "HFile" or "SequenceFile" + * @param keyLength + * @param valueLength + * @param codecName "none", "lzo", "gz" + * @param rows number of rows to be written. + * @param writeMethod used for HFile only. + * @param minBlockSize used for HFile only. + * @throws IOException + */ + //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks). + public void timeWrite(String fileType, int keyLength, int valueLength, + String codecName, long rows, String writeMethod, int minBlockSize) + throws IOException { + System.out.println("File Type: " + fileType); + System.out.println("Writing " + fileType + " with codecName: " + codecName); + long totalBytesWritten = 0; + + + //Using separate randomizer for key/value with seeds matching Sequence File. + byte[] key = new byte[keyLength]; + byte[] value = new byte[valueLength]; + KeyValueGenerator generator = new KeyValueGenerator(); + + startTime(); + + Path path = new Path(ROOT_DIR, fileType + ".Performance"); + System.out.println(ROOT_DIR + path.getName()); + FSDataOutputStream fout = createFSOutput(path); + + if ("HFile".equals(fileType)){ + System.out.println("HFile write method: "); + HFile.Writer writer = + new HFile.Writer(fout, minBlockSize, codecName, null); + + // Writing value in one shot. + for (long l=0 ; l