Return-Path: Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: (qmail 54642 invoked from network); 11 Jul 2009 01:22:44 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 11 Jul 2009 01:22:44 -0000 Received: (qmail 96103 invoked by uid 500); 11 Jul 2009 01:22:54 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 96032 invoked by uid 500); 11 Jul 2009 01:22:54 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 96023 invoked by uid 99); 11 Jul 2009 01:22:54 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 11 Jul 2009 01:22:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 11 Jul 2009 01:22:38 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id EC7722388905; Sat, 11 Jul 2009 01:22:16 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r793163 [4/4] - in /hadoop/common/branches/branch-0.20: ./ src/core/org/apache/hadoop/io/file/ src/core/org/apache/hadoop/io/file/tfile/ src/test/ src/test/org/apache/hadoop/io/file/ src/test/org/apache/hadoop/io/file/tfile/ Date: Sat, 11 Jul 2009 01:22:15 -0000 To: common-commits@hadoop.apache.org From: cdouglas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090711012216.EC7722388905@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsByteArrays.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; + +public class TestTFileNoneCodecsByteArrays extends TestTFileByteArrays { + /** + * Test non-compression codec, using the same test cases as in the ByteArrays. + */ + @Override + public void setUp() throws IOException { + init(Compression.Algorithm.NONE.getName(), "memcmp", "TFileTestCodecsNone", + 24, 24); + super.setUp(); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsJClassComparatorByteArrays.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; + +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.io.WritableComparator; + +/** + * + * Byte arrays test case class using GZ compression codec, base class of none + * and LZO compression classes. + * + */ + +public class TestTFileNoneCodecsJClassComparatorByteArrays extends TestTFileByteArrays { + /** + * Test non-compression codec, using the same test cases as in the ByteArrays. + */ + @Override + public void setUp() throws IOException { + init(Compression.Algorithm.NONE.getName(), + "jclass: org.apache.hadoop.io.file.tfile.MyComparator", + "TestTFileNoneCodecsJClassComparatorByteArrays", 24, 24); + super.setUp(); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileNoneCodecsStreams.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; + +public class TestTFileNoneCodecsStreams extends TestTFileStreams { + /** + * Test non-compression codec, using the same test cases as in the ByteArrays. + */ + @Override + public void setUp() throws IOException { + init(Compression.Algorithm.NONE.getName(), "memcmp", "TFileTestCodecsNone"); + super.setUp(); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeek.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,504 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; +import java.util.Random; +import java.util.StringTokenizer; + +import junit.framework.TestCase; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG; +import org.apache.hadoop.io.file.tfile.TFile.Reader; +import org.apache.hadoop.io.file.tfile.TFile.Writer; +import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner; + +/** + * test the performance for seek. + * + */ +public class TestTFileSeek extends TestCase { + private MyOptions options; + private Configuration conf; + private Path path; + private FileSystem fs; + private NanoTimer timer; + private Random rng; + private DiscreteRNG keyLenGen; + private KVGenerator kvGen; + + @Override + public void setUp() throws IOException { + if (options == null) { + options = new MyOptions(new String[0]); + } + + conf = new Configuration(); + conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize); + conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize); + path = new Path(new Path(options.rootDir), options.file); + fs = path.getFileSystem(conf); + timer = new NanoTimer(false); + rng = new Random(options.seed); + keyLenGen = + new RandomDistribution.Zipf(new Random(rng.nextLong()), + options.minKeyLen, options.maxKeyLen, 1.2); + DiscreteRNG valLenGen = + new RandomDistribution.Flat(new Random(rng.nextLong()), + options.minValLength, options.maxValLength); + DiscreteRNG wordLenGen = + new RandomDistribution.Flat(new Random(rng.nextLong()), + options.minWordLen, options.maxWordLen); + kvGen = + new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen, + options.dictSize); + } + + @Override + public void tearDown() throws IOException { + fs.delete(path, true); + } + + private static FSDataOutputStream createFSOutput(Path name, FileSystem fs) + throws IOException { + if (fs.exists(name)) { + fs.delete(name, true); + } + FSDataOutputStream fout = fs.create(name); + return fout; + } + + private void createTFile() throws IOException { + long totalBytes = 0; + FSDataOutputStream fout = createFSOutput(path, fs); + try { + Writer writer = + new Writer(fout, options.minBlockSize, options.compress, "memcmp", + conf); + try { + BytesWritable key = new BytesWritable(); + BytesWritable val = new BytesWritable(); + timer.start(); + for (long i = 0; true; ++i) { + if (i % 1000 == 0) { // test the size for every 1000 rows. + if (fs.getFileStatus(path).getLen() >= options.fileSize) { + break; + } + } + kvGen.next(key, val, false); + writer.append(key.get(), 0, key.getSize(), val.get(), 0, val + .getSize()); + totalBytes += key.getSize(); + totalBytes += val.getSize(); + } + timer.stop(); + } + finally { + writer.close(); + } + } + finally { + fout.close(); + } + double duration = (double)timer.read()/1000; // in us. + long fsize = fs.getFileStatus(path).getLen(); + + System.out.printf( + "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n", + timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes + / duration); + System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n", + timer.toString(), (double) fsize / 1024 / 1024, fsize / duration); + } + + public void seekTFile() throws IOException { + int miss = 0; + long totalBytes = 0; + FSDataInputStream fsdis = fs.open(path); + Reader reader = + new Reader(fsdis, fs.getFileStatus(path).getLen(), conf); + KeySampler kSampler = + new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), + keyLenGen); + Scanner scanner = reader.createScanner(); + BytesWritable key = new BytesWritable(); + BytesWritable val = new BytesWritable(); + timer.reset(); + timer.start(); + for (int i = 0; i < options.seekCount; ++i) { + kSampler.next(key); + scanner.lowerBound(key.get(), 0, key.getSize()); + if (!scanner.atEnd()) { + scanner.entry().get(key, val); + totalBytes += key.getSize(); + totalBytes += val.getSize(); + } + else { + ++miss; + } + } + timer.stop(); + double duration = (double) timer.read() / 1000; // in us. + System.out.printf( + "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n", + timer.toString(), NanoTimer.nanoTimeToString(timer.read() + / options.seekCount), options.seekCount - miss, miss, + (double) totalBytes / 1024 / (options.seekCount - miss)); + + } + + public void testSeeks() throws IOException { + String[] supported = TFile.getSupportedCompressionAlgorithms(); + boolean proceed = false; + for (String c : supported) { + if (c.equals(options.compress)) { + proceed = true; + break; + } + } + + if (!proceed) { + System.out.println("Skipped for " + options.compress); + return; + } + + if (options.doCreate()) { + createTFile(); + } + + if (options.doRead()) { + seekTFile(); + } + } + + private static class IntegerRange { + private final int from, to; + + public IntegerRange(int from, int to) { + this.from = from; + this.to = to; + } + + public static IntegerRange parse(String s) throws ParseException { + StringTokenizer st = new StringTokenizer(s, " \t,"); + if (st.countTokens() != 2) { + throw new ParseException("Bad integer specification: " + s); + } + int from = Integer.parseInt(st.nextToken()); + int to = Integer.parseInt(st.nextToken()); + return new IntegerRange(from, to); + } + + public int from() { + return from; + } + + public int to() { + return to; + } + } + + private static class MyOptions { + // hard coded constants + int dictSize = 1000; + int minWordLen = 5; + int maxWordLen = 20; + int osInputBufferSize = 64 * 1024; + int osOutputBufferSize = 64 * 1024; + int fsInputBufferSizeNone = 0; + int fsInputBufferSizeLzo = 0; + int fsInputBufferSizeGz = 0; + int fsOutputBufferSizeNone = 1; + int fsOutputBufferSizeLzo = 1; + int fsOutputBufferSizeGz = 1; + + String rootDir = + System.getProperty("test.build.data", "/tmp/tfile-test"); + String file = "TestTFileSeek"; + String compress = "gz"; + int minKeyLen = 10; + int maxKeyLen = 50; + int minValLength = 100; + int maxValLength = 200; + int minBlockSize = 64 * 1024; + int fsOutputBufferSize = 1; + int fsInputBufferSize = 0; + long fileSize = 3 * 1024 * 1024; + long seekCount = 1000; + long seed; + + static final int OP_CREATE = 1; + static final int OP_READ = 2; + int op = OP_CREATE | OP_READ; + + boolean proceed = false; + + public MyOptions(String[] args) { + seed = System.nanoTime(); + + try { + Options opts = buildOptions(); + CommandLineParser parser = new GnuParser(); + CommandLine line = parser.parse(opts, args, true); + processOptions(line, opts); + validateOptions(); + } + catch (ParseException e) { + System.out.println(e.getMessage()); + System.out.println("Try \"--help\" option for details."); + setStopProceed(); + } + } + + public boolean proceed() { + return proceed; + } + + private Options buildOptions() { + Option compress = + OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]") + .hasArg().withDescription("compression scheme").create('c'); + + Option fileSize = + OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB") + .hasArg().withDescription("target size of the file (in MB).") + .create('s'); + + Option fsInputBufferSz = + OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size") + .hasArg().withDescription( + "size of the file system input buffer (in bytes).").create( + 'i'); + + Option fsOutputBufferSize = + OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size") + .hasArg().withDescription( + "size of the file system output buffer (in bytes).").create( + 'o'); + + Option keyLen = + OptionBuilder + .withLongOpt("key-length") + .withArgName("min,max") + .hasArg() + .withDescription( + "the length range of the key (in bytes)") + .create('k'); + + Option valueLen = + OptionBuilder + .withLongOpt("value-length") + .withArgName("min,max") + .hasArg() + .withDescription( + "the length range of the value (in bytes)") + .create('v'); + + Option blockSz = + OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg() + .withDescription("minimum block size (in KB)").create('b'); + + Option seed = + OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg() + .withDescription("specify the seed").create('S'); + + Option operation = + OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg() + .withDescription( + "action: seek-only, create-only, seek-after-create").create( + 'x'); + + Option rootDir = + OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg() + .withDescription( + "specify root directory where files will be created.") + .create('r'); + + Option file = + OptionBuilder.withLongOpt("file").withArgName("name").hasArg() + .withDescription("specify the file name to be created or read.") + .create('f'); + + Option seekCount = + OptionBuilder + .withLongOpt("seek") + .withArgName("count") + .hasArg() + .withDescription( + "specify how many seek operations we perform (requires -x r or -x rw.") + .create('n'); + + Option help = + OptionBuilder.withLongOpt("help").hasArg(false).withDescription( + "show this screen").create("h"); + + return new Options().addOption(compress).addOption(fileSize).addOption( + fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen) + .addOption(blockSz).addOption(rootDir).addOption(valueLen).addOption( + operation).addOption(seekCount).addOption(file).addOption(help); + + } + + private void processOptions(CommandLine line, Options opts) + throws ParseException { + // --help -h and --version -V must be processed first. + if (line.hasOption('h')) { + HelpFormatter formatter = new HelpFormatter(); + System.out.println("TFile and SeqFile benchmark."); + System.out.println(); + formatter.printHelp(100, + "java ... TestTFileSeqFileComparison [options]", + "\nSupported options:", opts, ""); + return; + } + + if (line.hasOption('c')) { + compress = line.getOptionValue('c'); + } + + if (line.hasOption('d')) { + dictSize = Integer.parseInt(line.getOptionValue('d')); + } + + if (line.hasOption('s')) { + fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024; + } + + if (line.hasOption('i')) { + fsInputBufferSize = Integer.parseInt(line.getOptionValue('i')); + } + + if (line.hasOption('o')) { + fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o')); + } + + if (line.hasOption('n')) { + seekCount = Integer.parseInt(line.getOptionValue('n')); + } + + if (line.hasOption('k')) { + IntegerRange ir = IntegerRange.parse(line.getOptionValue('k')); + minKeyLen = ir.from(); + maxKeyLen = ir.to(); + } + + if (line.hasOption('v')) { + IntegerRange ir = IntegerRange.parse(line.getOptionValue('v')); + minValLength = ir.from(); + maxValLength = ir.to(); + } + + if (line.hasOption('b')) { + minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024; + } + + if (line.hasOption('r')) { + rootDir = line.getOptionValue('r'); + } + + if (line.hasOption('f')) { + file = line.getOptionValue('f'); + } + + if (line.hasOption('S')) { + seed = Long.parseLong(line.getOptionValue('S')); + } + + if (line.hasOption('x')) { + String strOp = line.getOptionValue('x'); + if (strOp.equals("r")) { + op = OP_READ; + } + else if (strOp.equals("w")) { + op = OP_CREATE; + } + else if (strOp.equals("rw")) { + op = OP_CREATE | OP_READ; + } + else { + throw new ParseException("Unknown action specifier: " + strOp); + } + } + + proceed = true; + } + + private void validateOptions() throws ParseException { + if (!compress.equals("none") && !compress.equals("lzo") + && !compress.equals("gz")) { + throw new ParseException("Unknown compression scheme: " + compress); + } + + if (minKeyLen >= maxKeyLen) { + throw new ParseException( + "Max key length must be greater than min key length."); + } + + if (minValLength >= maxValLength) { + throw new ParseException( + "Max value length must be greater than min value length."); + } + + if (minWordLen >= maxWordLen) { + throw new ParseException( + "Max word length must be greater than min word length."); + } + return; + } + + private void setStopProceed() { + proceed = false; + } + + public boolean doCreate() { + return (op & OP_CREATE) != 0; + } + + public boolean doRead() { + return (op & OP_READ) != 0; + } + } + + public static void main(String[] argv) throws IOException { + TestTFileSeek testCase = new TestTFileSeek(); + MyOptions options = new MyOptions(argv); + + if (options.proceed == false) { + return; + } + + testCase.options = options; + testCase.setUp(); + testCase.testSeeks(); + testCase.tearDown(); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSeqFileComparison.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,782 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Random; +import java.util.StringTokenizer; + +import junit.framework.TestCase; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner.Entry; + +public class TestTFileSeqFileComparison extends TestCase { + MyOptions options; + + private FileSystem fs; + private Configuration conf; + private long startTimeEpoch; + private long finishTimeEpoch; + private DateFormat formatter; + byte[][] dictionary; + + @Override + public void setUp() throws IOException { + if (options == null) { + options = new MyOptions(new String[0]); + } + + conf = new Configuration(); + conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize); + conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize); + Path path = new Path(options.rootDir); + fs = path.getFileSystem(conf); + formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + setUpDictionary(); + } + + private void setUpDictionary() { + Random rng = new Random(); + dictionary = new byte[options.dictSize][]; + for (int i = 0; i < options.dictSize; ++i) { + int len = + rng.nextInt(options.maxWordLen - options.minWordLen) + + options.minWordLen; + dictionary[i] = new byte[len]; + rng.nextBytes(dictionary[i]); + } + } + + @Override + public void tearDown() throws IOException { + // do nothing + } + + public void startTime() throws IOException { + startTimeEpoch = System.currentTimeMillis(); + System.out.println(formatTime() + " Started timing."); + } + + public void stopTime() throws IOException { + finishTimeEpoch = System.currentTimeMillis(); + System.out.println(formatTime() + " Stopped timing."); + } + + public long getIntervalMillis() throws IOException { + return finishTimeEpoch - startTimeEpoch; + } + + public void printlnWithTimestamp(String message) throws IOException { + System.out.println(formatTime() + " " + message); + } + + /* + * Format millis into minutes and seconds. + */ + public String formatTime(long milis) { + return formatter.format(milis); + } + + public String formatTime() { + return formatTime(System.currentTimeMillis()); + } + + private interface KVAppendable { + public void append(BytesWritable key, BytesWritable value) + throws IOException; + + public void close() throws IOException; + } + + private interface KVReadable { + public byte[] getKey(); + + public byte[] getValue(); + + public int getKeyLength(); + + public int getValueLength(); + + public boolean next() throws IOException; + + public void close() throws IOException; + } + + static class TFileAppendable implements KVAppendable { + private FSDataOutputStream fsdos; + private TFile.Writer writer; + + public TFileAppendable(FileSystem fs, Path path, String compress, + int minBlkSize, int osBufferSize, Configuration conf) + throws IOException { + this.fsdos = fs.create(path, true, osBufferSize); + this.writer = new TFile.Writer(fsdos, minBlkSize, compress, null, conf); + } + + public void append(BytesWritable key, BytesWritable value) + throws IOException { + writer.append(key.get(), 0, key.getSize(), value.get(), 0, value + .getSize()); + } + + public void close() throws IOException { + writer.close(); + fsdos.close(); + } + } + + static class TFileReadable implements KVReadable { + private FSDataInputStream fsdis; + private TFile.Reader reader; + private TFile.Reader.Scanner scanner; + private byte[] keyBuffer; + private int keyLength; + private byte[] valueBuffer; + private int valueLength; + + public TFileReadable(FileSystem fs, Path path, int osBufferSize, + Configuration conf) throws IOException { + this.fsdis = fs.open(path, osBufferSize); + this.reader = + new TFile.Reader(fsdis, fs.getFileStatus(path).getLen(), conf); + this.scanner = reader.createScanner(); + keyBuffer = new byte[32]; + valueBuffer = new byte[32]; + } + + private void checkKeyBuffer(int size) { + if (size <= keyBuffer.length) { + return; + } + keyBuffer = + new byte[Math.max(2 * keyBuffer.length, 2 * size - keyBuffer.length)]; + } + + private void checkValueBuffer(int size) { + if (size <= valueBuffer.length) { + return; + } + valueBuffer = + new byte[Math.max(2 * valueBuffer.length, 2 * size + - valueBuffer.length)]; + } + + public byte[] getKey() { + return keyBuffer; + } + + public int getKeyLength() { + return keyLength; + } + + public byte[] getValue() { + return valueBuffer; + } + + public int getValueLength() { + return valueLength; + } + + public boolean next() throws IOException { + if (scanner.atEnd()) return false; + Entry entry = scanner.entry(); + keyLength = entry.getKeyLength(); + checkKeyBuffer(keyLength); + entry.getKey(keyBuffer); + valueLength = entry.getValueLength(); + checkValueBuffer(valueLength); + entry.getValue(valueBuffer); + scanner.advance(); + return true; + } + + public void close() throws IOException { + scanner.close(); + reader.close(); + fsdis.close(); + } + } + + static class SeqFileAppendable implements KVAppendable { + private FSDataOutputStream fsdos; + private SequenceFile.Writer writer; + + public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize, + String compress, int minBlkSize) throws IOException { + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", true); + + CompressionCodec codec = null; + if ("lzo".equals(compress)) { + codec = Compression.Algorithm.LZO.getCodec(); + } + else if ("gz".equals(compress)) { + codec = Compression.Algorithm.GZ.getCodec(); + } + else if (!"none".equals(compress)) + throw new IOException("Codec not supported."); + + this.fsdos = fs.create(path, true, osBufferSize); + + if (!"none".equals(compress)) { + writer = + SequenceFile.createWriter(conf, fsdos, BytesWritable.class, + BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec); + } + else { + writer = + SequenceFile.createWriter(conf, fsdos, BytesWritable.class, + BytesWritable.class, SequenceFile.CompressionType.NONE, null); + } + } + + public void append(BytesWritable key, BytesWritable value) + throws IOException { + writer.append(key, value); + } + + public void close() throws IOException { + writer.close(); + fsdos.close(); + } + } + + static class SeqFileReadable implements KVReadable { + private SequenceFile.Reader reader; + private BytesWritable key; + private BytesWritable value; + + public SeqFileReadable(FileSystem fs, Path path, int osBufferSize) + throws IOException { + Configuration conf = new Configuration(); + conf.setInt("io.file.buffer.size", osBufferSize); + reader = new SequenceFile.Reader(fs, path, conf); + key = new BytesWritable(); + value = new BytesWritable(); + } + + public byte[] getKey() { + return key.get(); + } + + public int getKeyLength() { + return key.getSize(); + } + + public byte[] getValue() { + return value.get(); + } + + public int getValueLength() { + return value.getSize(); + } + + public boolean next() throws IOException { + return reader.next(key, value); + } + + public void close() throws IOException { + reader.close(); + } + } + + private void reportStats(Path path, long totalBytes) throws IOException { + long duration = getIntervalMillis(); + long fsize = fs.getFileStatus(path).getLen(); + printlnWithTimestamp(String.format( + "Duration: %dms...total size: %.2fMB...raw thrpt: %.2fMB/s", duration, + (double) totalBytes / 1024 / 1024, (double) totalBytes / duration + * 1000 / 1024 / 1024)); + printlnWithTimestamp(String.format( + "Compressed size: %.2fMB...compressed thrpt: %.2fMB/s.", + (double) fsize / 1024 / 1024, (double) fsize / duration * 1000 / 1024 + / 1024)); + } + + private void fillBuffer(Random rng, BytesWritable bw, byte[] tmp, int len) { + int n = 0; + while (n < len) { + byte[] word = dictionary[rng.nextInt(dictionary.length)]; + int l = Math.min(word.length, len - n); + System.arraycopy(word, 0, tmp, n, l); + n += l; + } + bw.set(tmp, 0, len); + } + + private void timeWrite(Path path, KVAppendable appendable, int baseKlen, + int baseVlen, long fileSize) throws IOException { + int maxKlen = baseKlen * 2; + int maxVlen = baseVlen * 2; + BytesWritable key = new BytesWritable(); + BytesWritable value = new BytesWritable(); + byte[] keyBuffer = new byte[maxKlen]; + byte[] valueBuffer = new byte[maxVlen]; + Random rng = new Random(options.seed); + long totalBytes = 0; + printlnWithTimestamp("Start writing: " + path.getName() + "..."); + startTime(); + + for (long i = 0; true; ++i) { + if (i % 1000 == 0) { // test the size for every 1000 rows. + if (fs.getFileStatus(path).getLen() >= fileSize) { + break; + } + } + int klen = rng.nextInt(baseKlen) + baseKlen; + int vlen = rng.nextInt(baseVlen) + baseVlen; + fillBuffer(rng, key, keyBuffer, klen); + fillBuffer(rng, value, valueBuffer, vlen); + key.set(keyBuffer, 0, klen); + value.set(valueBuffer, 0, vlen); + appendable.append(key, value); + totalBytes += klen; + totalBytes += vlen; + } + stopTime(); + appendable.close(); + reportStats(path, totalBytes); + } + + private void timeRead(Path path, KVReadable readable) throws IOException { + printlnWithTimestamp("Start reading: " + path.getName() + "..."); + long totalBytes = 0; + startTime(); + for (; readable.next();) { + totalBytes += readable.getKeyLength(); + totalBytes += readable.getValueLength(); + } + stopTime(); + readable.close(); + reportStats(path, totalBytes); + } + + private void createTFile(String parameters, String compress) + throws IOException { + System.out.println("=== TFile: Creation (" + parameters + ") === "); + Path path = new Path(options.rootDir, "TFile.Performance"); + KVAppendable appendable = + new TFileAppendable(fs, path, compress, options.minBlockSize, + options.osOutputBufferSize, conf); + timeWrite(path, appendable, options.keyLength, options.valueLength, + options.fileSize); + } + + private void readTFile(String parameters, boolean delFile) throws IOException { + System.out.println("=== TFile: Reading (" + parameters + ") === "); + { + Path path = new Path(options.rootDir, "TFile.Performance"); + KVReadable readable = + new TFileReadable(fs, path, options.osInputBufferSize, conf); + timeRead(path, readable); + if (delFile) { + if (fs.exists(path)) { + fs.delete(path, true); + } + } + } + } + + private void createSeqFile(String parameters, String compress) + throws IOException { + System.out.println("=== SeqFile: Creation (" + parameters + ") === "); + Path path = new Path(options.rootDir, "SeqFile.Performance"); + KVAppendable appendable = + new SeqFileAppendable(fs, path, options.osOutputBufferSize, compress, + options.minBlockSize); + timeWrite(path, appendable, options.keyLength, options.valueLength, + options.fileSize); + } + + private void readSeqFile(String parameters, boolean delFile) + throws IOException { + System.out.println("=== SeqFile: Reading (" + parameters + ") === "); + Path path = new Path(options.rootDir, "SeqFile.Performance"); + KVReadable readable = + new SeqFileReadable(fs, path, options.osInputBufferSize); + timeRead(path, readable); + if (delFile) { + if (fs.exists(path)) { + fs.delete(path, true); + } + } + } + + private void compareRun(String compress) throws IOException { + String[] supported = TFile.getSupportedCompressionAlgorithms(); + boolean proceed = false; + for (String c : supported) { + if (c.equals(compress)) { + proceed = true; + break; + } + } + + if (!proceed) { + System.out.println("Skipped for " + compress); + return; + } + + options.compress = compress; + String parameters = parameters2String(options); + createSeqFile(parameters, compress); + readSeqFile(parameters, true); + createTFile(parameters, compress); + readTFile(parameters, true); + createTFile(parameters, compress); + readTFile(parameters, true); + createSeqFile(parameters, compress); + readSeqFile(parameters, true); + } + + public void testRunComparisons() throws IOException { + String[] compresses = new String[] { "none", "lzo", "gz" }; + for (String compress : compresses) { + if (compress.equals("none")) { + conf + .setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeNone); + conf.setInt("tfile.fs.output.buffer.size", + options.fsOutputBufferSizeNone); + } + else if (compress.equals("lzo")) { + conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeLzo); + conf.setInt("tfile.fs.output.buffer.size", + options.fsOutputBufferSizeLzo); + } + else { + conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSizeGz); + conf + .setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSizeGz); + } + compareRun(compress); + } + } + + private static String parameters2String(MyOptions options) { + return String + .format( + "KLEN: %d-%d... VLEN: %d-%d...MinBlkSize: %.2fKB...Target Size: %.2fMB...Compression: ...%s", + options.keyLength, options.keyLength * 2, options.valueLength, + options.valueLength * 2, (double) options.minBlockSize / 1024, + (double) options.fileSize / 1024 / 1024, options.compress); + } + + private static class MyOptions { + String rootDir = + System + .getProperty("test.build.data", "/tmp/tfile-test"); + String compress = "gz"; + String format = "tfile"; + int dictSize = 1000; + int minWordLen = 5; + int maxWordLen = 20; + int keyLength = 50; + int valueLength = 100; + int minBlockSize = 256 * 1024; + int fsOutputBufferSize = 1; + int fsInputBufferSize = 0; + // special variable only for unit testing. + int fsInputBufferSizeNone = 0; + int fsInputBufferSizeGz = 0; + int fsInputBufferSizeLzo = 0; + int fsOutputBufferSizeNone = 1; + int fsOutputBufferSizeGz = 1; + int fsOutputBufferSizeLzo = 1; + + // un-exposed parameters. + int osInputBufferSize = 64 * 1024; + int osOutputBufferSize = 64 * 1024; + + long fileSize = 3 * 1024 * 1024; + long seed; + + static final int OP_CREATE = 1; + static final int OP_READ = 2; + int op = OP_READ; + + boolean proceed = false; + + public MyOptions(String[] args) { + seed = System.nanoTime(); + + try { + Options opts = buildOptions(); + CommandLineParser parser = new GnuParser(); + CommandLine line = parser.parse(opts, args, true); + processOptions(line, opts); + validateOptions(); + } + catch (ParseException e) { + System.out.println(e.getMessage()); + System.out.println("Try \"--help\" option for details."); + setStopProceed(); + } + } + + public boolean proceed() { + return proceed; + } + + private Options buildOptions() { + Option compress = + OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz]") + .hasArg().withDescription("compression scheme").create('c'); + + Option ditSize = + OptionBuilder.withLongOpt("dict").withArgName("size").hasArg() + .withDescription("number of dictionary entries").create('d'); + + Option fileSize = + OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB") + .hasArg().withDescription("target size of the file (in MB).") + .create('s'); + + Option format = + OptionBuilder.withLongOpt("format").withArgName("[tfile|seqfile]") + .hasArg().withDescription("choose TFile or SeqFile").create('f'); + + Option fsInputBufferSz = + OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size") + .hasArg().withDescription( + "size of the file system input buffer (in bytes).").create( + 'i'); + + Option fsOutputBufferSize = + OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size") + .hasArg().withDescription( + "size of the file system output buffer (in bytes).").create( + 'o'); + + Option keyLen = + OptionBuilder + .withLongOpt("key-length") + .withArgName("length") + .hasArg() + .withDescription( + "base length of the key (in bytes), actual length varies in [base, 2*base)") + .create('k'); + + Option valueLen = + OptionBuilder + .withLongOpt("value-length") + .withArgName("length") + .hasArg() + .withDescription( + "base length of the value (in bytes), actual length varies in [base, 2*base)") + .create('v'); + + Option wordLen = + OptionBuilder.withLongOpt("word-length").withArgName("min,max") + .hasArg().withDescription( + "range of dictionary word length (in bytes)").create('w'); + + Option blockSz = + OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg() + .withDescription("minimum block size (in KB)").create('b'); + + Option seed = + OptionBuilder.withLongOpt("seed").withArgName("long-int").hasArg() + .withDescription("specify the seed").create('S'); + + Option operation = + OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg() + .withDescription( + "action: read-only, create-only, read-after-create").create( + 'x'); + + Option rootDir = + OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg() + .withDescription( + "specify root directory where files will be created.") + .create('r'); + + Option help = + OptionBuilder.withLongOpt("help").hasArg(false).withDescription( + "show this screen").create("h"); + + return new Options().addOption(compress).addOption(ditSize).addOption( + fileSize).addOption(format).addOption(fsInputBufferSz).addOption( + fsOutputBufferSize).addOption(keyLen).addOption(wordLen).addOption( + blockSz).addOption(rootDir).addOption(valueLen).addOption(operation) + .addOption(help); + + } + + private void processOptions(CommandLine line, Options opts) + throws ParseException { + // --help -h and --version -V must be processed first. + if (line.hasOption('h')) { + HelpFormatter formatter = new HelpFormatter(); + System.out.println("TFile and SeqFile benchmark."); + System.out.println(); + formatter.printHelp(100, + "java ... TestTFileSeqFileComparison [options]", + "\nSupported options:", opts, ""); + return; + } + + if (line.hasOption('c')) { + compress = line.getOptionValue('c'); + } + + if (line.hasOption('d')) { + dictSize = Integer.parseInt(line.getOptionValue('d')); + } + + if (line.hasOption('s')) { + fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024; + } + + if (line.hasOption('f')) { + format = line.getOptionValue('f'); + } + + if (line.hasOption('i')) { + fsInputBufferSize = Integer.parseInt(line.getOptionValue('i')); + } + + if (line.hasOption('o')) { + fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o')); + } + + if (line.hasOption('k')) { + keyLength = Integer.parseInt(line.getOptionValue('k')); + } + + if (line.hasOption('v')) { + valueLength = Integer.parseInt(line.getOptionValue('v')); + } + + if (line.hasOption('b')) { + minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024; + } + + if (line.hasOption('r')) { + rootDir = line.getOptionValue('r'); + } + + if (line.hasOption('S')) { + seed = Long.parseLong(line.getOptionValue('S')); + } + + if (line.hasOption('w')) { + String min_max = line.getOptionValue('w'); + StringTokenizer st = new StringTokenizer(min_max, " \t,"); + if (st.countTokens() != 2) { + throw new ParseException("Bad word length specification: " + min_max); + } + minWordLen = Integer.parseInt(st.nextToken()); + maxWordLen = Integer.parseInt(st.nextToken()); + } + + if (line.hasOption('x')) { + String strOp = line.getOptionValue('x'); + if (strOp.equals("r")) { + op = OP_READ; + } + else if (strOp.equals("w")) { + op = OP_CREATE; + } + else if (strOp.equals("rw")) { + op = OP_CREATE | OP_READ; + } + else { + throw new ParseException("Unknown action specifier: " + strOp); + } + } + + proceed = true; + } + + private void validateOptions() throws ParseException { + if (!compress.equals("none") && !compress.equals("lzo") + && !compress.equals("gz")) { + throw new ParseException("Unknown compression scheme: " + compress); + } + + if (!format.equals("tfile") && !format.equals("seqfile")) { + throw new ParseException("Unknown file format: " + format); + } + + if (minWordLen >= maxWordLen) { + throw new ParseException( + "Max word length must be greater than min word length."); + } + return; + } + + private void setStopProceed() { + proceed = false; + } + + public boolean doCreate() { + return (op & OP_CREATE) != 0; + } + + public boolean doRead() { + return (op & OP_READ) != 0; + } + } + + public static void main(String[] args) throws IOException { + TestTFileSeqFileComparison testCase = new TestTFileSeqFileComparison(); + MyOptions options = new MyOptions(args); + if (options.proceed == false) { + return; + } + testCase.options = options; + String parameters = parameters2String(options); + + testCase.setUp(); + if (testCase.options.format.equals("tfile")) { + if (options.doCreate()) { + testCase.createTFile(parameters, options.compress); + } + if (options.doRead()) { + testCase.readTFile(parameters, options.doCreate()); + } + } + else { + if (options.doCreate()) { + testCase.createSeqFile(parameters, options.compress); + } + if (options.doRead()) { + testCase.readSeqFile(parameters, options.doCreate()); + } + } + testCase.tearDown(); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileSplit.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.file.tfile.TFile.Reader; +import org.apache.hadoop.io.file.tfile.TFile.Writer; +import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner; + +public class TestTFileSplit extends TestCase { + private static String ROOT = + System.getProperty("test.build.data", "/tmp/tfile-test"); + + private final static int BLOCK_SIZE = 64 * 1024; + + private static final String KEY = "key"; + private static final String VALUE = "value"; + + private FileSystem fs; + private Configuration conf; + private Path path; + + private String comparator = "memcmp"; + private String outputFile = "TestTFileSplit"; + + void createFile(int count, String compress) throws IOException { + conf = new Configuration(); + path = new Path(ROOT, outputFile + "." + compress); + fs = path.getFileSystem(conf); + FSDataOutputStream out = fs.create(path); + Writer writer = new Writer(out, BLOCK_SIZE, compress, comparator, conf); + + int nx; + for (nx = 0; nx < count; nx++) { + byte[] key = composeSortedKey(KEY, count, nx).getBytes(); + byte[] value = (VALUE + nx).getBytes(); + writer.append(key, value); + } + writer.close(); + out.close(); + } + + void readFile() throws IOException { + long fileLength = fs.getFileStatus(path).getLen(); + int numSplit = 10; + long splitSize = fileLength / numSplit + 1; + + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + long offset = 0; + long rowCount = 0; + BytesWritable key, value; + for (int i = 0; i < numSplit; ++i, offset += splitSize) { + Scanner scanner = reader.createScanner(offset, splitSize); + int count = 0; + key = new BytesWritable(); + value = new BytesWritable(); + while (!scanner.atEnd()) { + scanner.entry().get(key, value); + ++count; + scanner.advance(); + } + scanner.close(); + Assert.assertTrue(count > 0); + rowCount += count; + } + Assert.assertEquals(rowCount, reader.getEntryCount()); + reader.close(); + } + + static String composeSortedKey(String prefix, int total, int value) { + return String.format("%s%010d", prefix, value); + } + + public void testSplit() throws IOException { + System.out.println("testSplit"); + createFile(100000, Compression.Algorithm.NONE.getName()); + readFile(); + fs.delete(path, true); + createFile(500000, Compression.Algorithm.GZ.getName()); + readFile(); + fs.delete(path, true); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileStreams.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,423 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.DataOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.util.Random; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.io.file.tfile.TFile.Reader; +import org.apache.hadoop.io.file.tfile.TFile.Writer; +import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner; + +/** + * + * Streaming interfaces test case class using GZ compression codec, base class + * of none and LZO compression classes. + * + */ + +public class TestTFileStreams extends TestCase { + private static String ROOT = + System.getProperty("test.build.data", "/tmp/tfile-test"); + + private final static int BLOCK_SIZE = 512; + private final static int K = 1024; + private final static int M = K * K; + protected boolean skip = false; + private FileSystem fs; + private Configuration conf; + private Path path; + private FSDataOutputStream out; + Writer writer; + + private String compression = Compression.Algorithm.GZ.getName(); + private String comparator = "memcmp"; + private String outputFile = "TFileTestStreams"; + + public void init(String compression, String comparator, String outputFile) { + this.compression = compression; + this.comparator = comparator; + this.outputFile = outputFile; + } + + @Override + public void setUp() throws IOException { + conf = new Configuration(); + path = new Path(ROOT, outputFile); + fs = path.getFileSystem(conf); + out = fs.create(path); + writer = new Writer(out, BLOCK_SIZE, compression, comparator, conf); + } + + @Override + public void tearDown() throws IOException { + if (!skip) { + try { + closeOutput(); + } catch (Exception e) { + // no-op + } + fs.delete(path, true); + } + } + + public void testNoEntry() throws IOException { + if (skip) + return; + closeOutput(); + TestTFileByteArrays.readRecords(fs, path, 0, conf); + } + + public void testOneEntryKnownLength() throws IOException { + if (skip) + return; + writeRecords(1, true, true); + + TestTFileByteArrays.readRecords(fs, path, 1, conf); + } + + public void testOneEntryUnknownLength() throws IOException { + if (skip) + return; + writeRecords(1, false, false); + + // TODO: will throw exception at getValueLength, it's inconsistent though; + // getKeyLength returns a value correctly, though initial length is -1 + TestTFileByteArrays.readRecords(fs, path, 1, conf); + } + + // known key length, unknown value length + public void testOneEntryMixedLengths1() throws IOException { + if (skip) + return; + writeRecords(1, true, false); + + TestTFileByteArrays.readRecords(fs, path, 1, conf); + } + + // unknown key length, known value length + public void testOneEntryMixedLengths2() throws IOException { + if (skip) + return; + writeRecords(1, false, true); + + TestTFileByteArrays.readRecords(fs, path, 1, conf); + } + + public void testTwoEntriesKnownLength() throws IOException { + if (skip) + return; + writeRecords(2, true, true); + + TestTFileByteArrays.readRecords(fs, path, 2, conf); + } + + // Negative test + public void testFailureAddKeyWithoutValue() throws IOException { + if (skip) + return; + DataOutputStream dos = writer.prepareAppendKey(-1); + dos.write("key0".getBytes()); + try { + closeOutput(); + fail("Cannot add only a key without a value. "); + } + catch (IllegalStateException e) { + // noop, expecting an exception + } + } + + public void testFailureAddValueWithoutKey() throws IOException { + if (skip) + return; + DataOutputStream outValue = null; + try { + outValue = writer.prepareAppendValue(6); + outValue.write("value0".getBytes()); + fail("Cannot add a value without adding key first. "); + } + catch (Exception e) { + // noop, expecting an exception + } + finally { + if (outValue != null) { + outValue.close(); + } + } + } + + public void testFailureOneEntryKnownLength() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(2); + try { + outKey.write("key0".getBytes()); + fail("Specified key length mismatched the actual key length."); + } + catch (IOException e) { + // noop, expecting an exception + } + + DataOutputStream outValue = null; + try { + outValue = writer.prepareAppendValue(6); + outValue.write("value0".getBytes()); + } + catch (Exception e) { + // noop, expecting an exception + } + } + + public void testFailureKeyTooLong() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(2); + try { + outKey.write("key0".getBytes()); + outKey.close(); + Assert.fail("Key is longer than requested."); + } + catch (Exception e) { + // noop, expecting an exception + } + finally { + } + } + + public void testFailureKeyTooShort() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(4); + outKey.write("key0".getBytes()); + outKey.close(); + DataOutputStream outValue = writer.prepareAppendValue(15); + try { + outValue.write("value0".getBytes()); + outValue.close(); + Assert.fail("Value is shorter than expected."); + } + catch (Exception e) { + // noop, expecting an exception + } + finally { + } + } + + public void testFailureValueTooLong() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(4); + outKey.write("key0".getBytes()); + outKey.close(); + DataOutputStream outValue = writer.prepareAppendValue(3); + try { + outValue.write("value0".getBytes()); + outValue.close(); + Assert.fail("Value is longer than expected."); + } + catch (Exception e) { + // noop, expecting an exception + } + + try { + outKey.close(); + outKey.close(); + } + catch (Exception e) { + Assert.fail("Second or more close() should have no effect."); + } + } + + public void testFailureValueTooShort() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(8); + try { + outKey.write("key0".getBytes()); + outKey.close(); + Assert.fail("Key is shorter than expected."); + } + catch (Exception e) { + // noop, expecting an exception + } + finally { + } + } + + public void testFailureCloseKeyStreamManyTimesInWriter() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(4); + try { + outKey.write("key0".getBytes()); + outKey.close(); + } + catch (Exception e) { + // noop, expecting an exception + } + finally { + try { + outKey.close(); + } + catch (Exception e) { + // no-op + } + } + outKey.close(); + outKey.close(); + Assert.assertTrue("Multiple close should have no effect.", true); + } + + public void testFailureKeyLongerThan64K() throws IOException { + if (skip) + return; + try { + DataOutputStream outKey = writer.prepareAppendKey(64 * K + 1); + Assert.fail("Failed to handle key longer than 64K."); + } + catch (IndexOutOfBoundsException e) { + // noop, expecting exceptions + } + closeOutput(); + } + + public void testFailureKeyLongerThan64K_2() throws IOException { + if (skip) + return; + DataOutputStream outKey = writer.prepareAppendKey(-1); + try { + byte[] buf = new byte[K]; + Random rand = new Random(); + for (int nx = 0; nx < K + 2; nx++) { + rand.nextBytes(buf); + outKey.write(buf); + } + outKey.close(); + Assert.fail("Failed to handle key longer than 64K."); + } + catch (EOFException e) { + // noop, expecting exceptions + } + finally { + try { + closeOutput(); + } + catch (Exception e) { + // no-op + } + } + } + + public void testFailureNegativeOffset() throws IOException { + if (skip) + return; + writeRecords(2, true, true); + + Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + Scanner scanner = reader.createScanner(); + byte[] buf = new byte[K]; + try { + scanner.entry().getKey(buf, -1); + Assert.fail("Failed to handle key negative offset."); + } + catch (Exception e) { + // noop, expecting exceptions + } + finally { + } + scanner.close(); + reader.close(); + } + + /** + * Verify that the compressed data size is less than raw data size. + * + * @throws IOException + */ + public void testFailureCompressionNotWorking() throws IOException { + if (skip) + return; + long rawDataSize = writeRecords(10000, false, false, false); + if (!compression.equalsIgnoreCase(Compression.Algorithm.NONE.getName())) { + Assert.assertTrue(out.getPos() < rawDataSize); + } + closeOutput(); + } + + public void testFailureCompressionNotWorking2() throws IOException { + if (skip) + return; + long rawDataSize = writeRecords(10000, true, true, false); + if (!compression.equalsIgnoreCase(Compression.Algorithm.NONE.getName())) { + Assert.assertTrue(out.getPos() < rawDataSize); + } + closeOutput(); + } + + private long writeRecords(int count, boolean knownKeyLength, + boolean knownValueLength, boolean close) throws IOException { + long rawDataSize = 0; + for (int nx = 0; nx < count; nx++) { + String key = TestTFileByteArrays.composeSortedKey("key", count, nx); + DataOutputStream outKey = + writer.prepareAppendKey(knownKeyLength ? key.length() : -1); + outKey.write(key.getBytes()); + outKey.close(); + String value = "value" + nx; + DataOutputStream outValue = + writer.prepareAppendValue(knownValueLength ? value.length() : -1); + outValue.write(value.getBytes()); + outValue.close(); + rawDataSize += + WritableUtils.getVIntSize(key.getBytes().length) + + key.getBytes().length + + WritableUtils.getVIntSize(value.getBytes().length) + + value.getBytes().length; + } + if (close) { + closeOutput(); + } + return rawDataSize; + } + + private long writeRecords(int count, boolean knownKeyLength, + boolean knownValueLength) throws IOException { + return writeRecords(count, knownKeyLength, knownValueLength, true); + } + + private void closeOutput() throws IOException { + if (writer != null) { + writer.close(); + writer = null; + } + if (out != null) { + out.close(); + out = null; + } + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.file.tfile.TFile.Reader; +import org.apache.hadoop.io.file.tfile.TFile.Writer; +import org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner; + +public class TestTFileUnsortedByteArrays extends TestCase { + private static String ROOT = + System.getProperty("test.build.data", "/tmp/tfile-test"); + + + private final static int BLOCK_SIZE = 512; + private final static int BUF_SIZE = 64; + + private FileSystem fs; + private Configuration conf; + private Path path; + private FSDataOutputStream out; + private Writer writer; + + private String compression = Compression.Algorithm.GZ.getName(); + private String outputFile = "TFileTestUnsorted"; + /* + * pre-sampled numbers of records in one block, based on the given the + * generated key and value strings + */ + private int records1stBlock = 4314; + private int records2ndBlock = 4108; + + public void init(String compression, String outputFile, + int numRecords1stBlock, int numRecords2ndBlock) { + this.compression = compression; + this.outputFile = outputFile; + this.records1stBlock = numRecords1stBlock; + this.records2ndBlock = numRecords2ndBlock; + } + + @Override + public void setUp() throws IOException { + conf = new Configuration(); + path = new Path(ROOT, outputFile); + fs = path.getFileSystem(conf); + out = fs.create(path); + writer = new Writer(out, BLOCK_SIZE, compression, null, conf); + writer.append("keyZ".getBytes(), "valueZ".getBytes()); + writer.append("keyM".getBytes(), "valueM".getBytes()); + writer.append("keyN".getBytes(), "valueN".getBytes()); + writer.append("keyA".getBytes(), "valueA".getBytes()); + closeOutput(); + } + + @Override + public void tearDown() throws IOException { + fs.delete(path, true); + } + + // we still can scan records in an unsorted TFile + public void testFailureScannerWithKeys() throws IOException { + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + Assert.assertFalse(reader.isSorted()); + Assert.assertEquals((int) reader.getEntryCount(), 4); + + try { + Scanner scanner = + reader.createScanner("aaa".getBytes(), "zzz".getBytes()); + Assert + .fail("Failed to catch creating scanner with keys on unsorted file."); + } + catch (RuntimeException e) { + } + finally { + reader.close(); + } + } + + // we still can scan records in an unsorted TFile + public void testScan() throws IOException { + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + Assert.assertFalse(reader.isSorted()); + Assert.assertEquals((int) reader.getEntryCount(), 4); + + Scanner scanner = reader.createScanner(); + + try { + + // read key and value + byte[] kbuf = new byte[BUF_SIZE]; + int klen = scanner.entry().getKeyLength(); + scanner.entry().getKey(kbuf); + Assert.assertEquals(new String(kbuf, 0, klen), "keyZ"); + + byte[] vbuf = new byte[BUF_SIZE]; + int vlen = scanner.entry().getValueLength(); + scanner.entry().getValue(vbuf); + Assert.assertEquals(new String(vbuf, 0, vlen), "valueZ"); + + scanner.advance(); + + // now try get value first + vbuf = new byte[BUF_SIZE]; + vlen = scanner.entry().getValueLength(); + scanner.entry().getValue(vbuf); + Assert.assertEquals(new String(vbuf, 0, vlen), "valueM"); + + kbuf = new byte[BUF_SIZE]; + klen = scanner.entry().getKeyLength(); + scanner.entry().getKey(kbuf); + Assert.assertEquals(new String(kbuf, 0, klen), "keyM"); + } + finally { + scanner.close(); + reader.close(); + } + } + + // we still can scan records in an unsorted TFile + public void testScanRange() throws IOException { + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + Assert.assertFalse(reader.isSorted()); + Assert.assertEquals((int) reader.getEntryCount(), 4); + + Scanner scanner = reader.createScanner(); + + try { + + // read key and value + byte[] kbuf = new byte[BUF_SIZE]; + int klen = scanner.entry().getKeyLength(); + scanner.entry().getKey(kbuf); + Assert.assertEquals(new String(kbuf, 0, klen), "keyZ"); + + byte[] vbuf = new byte[BUF_SIZE]; + int vlen = scanner.entry().getValueLength(); + scanner.entry().getValue(vbuf); + Assert.assertEquals(new String(vbuf, 0, vlen), "valueZ"); + + scanner.advance(); + + // now try get value first + vbuf = new byte[BUF_SIZE]; + vlen = scanner.entry().getValueLength(); + scanner.entry().getValue(vbuf); + Assert.assertEquals(new String(vbuf, 0, vlen), "valueM"); + + kbuf = new byte[BUF_SIZE]; + klen = scanner.entry().getKeyLength(); + scanner.entry().getKey(kbuf); + Assert.assertEquals(new String(kbuf, 0, klen), "keyM"); + } + finally { + scanner.close(); + reader.close(); + } + } + + public void testFailureSeek() throws IOException { + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + Scanner scanner = reader.createScanner(); + + try { + // can't find ceil + try { + scanner.lowerBound("keyN".getBytes()); + Assert.fail("Cannot search in a unsorted TFile!"); + } + catch (Exception e) { + // noop, expecting excetions + } + finally { + } + + // can't find higher + try { + scanner.upperBound("keyA".getBytes()); + Assert.fail("Cannot search higher in a unsorted TFile!"); + } + catch (Exception e) { + // noop, expecting excetions + } + finally { + } + + // can't seek + try { + scanner.seekTo("keyM".getBytes()); + Assert.fail("Cannot search a unsorted TFile!"); + } + catch (Exception e) { + // noop, expecting excetions + } + finally { + } + } + finally { + scanner.close(); + reader.close(); + } + } + + private void closeOutput() throws IOException { + if (writer != null) { + writer.close(); + writer = null; + out.close(); + out = null; + } + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/TestVLong.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; +import java.util.Random; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class TestVLong extends TestCase { + private static String ROOT = + System.getProperty("test.build.data", "/tmp/tfile-test"); + private Configuration conf; + private FileSystem fs; + private Path path; + private String outputFile = "TestVLong"; + + @Override + public void setUp() throws IOException { + conf = new Configuration(); + path = new Path(ROOT, outputFile); + fs = path.getFileSystem(conf); + if (fs.exists(path)) { + fs.delete(path, false); + } + } + + @Override + public void tearDown() throws IOException { + if (fs.exists(path)) { + fs.delete(path, false); + } + } + + public void testVLongByte() throws IOException { + FSDataOutputStream out = fs.create(path); + for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; ++i) { + Utils.writeVLong(out, i); + } + out.close(); + Assert.assertEquals("Incorrect encoded size", (1 << Byte.SIZE) + 96, fs + .getFileStatus( + path).getLen()); + + FSDataInputStream in = fs.open(path); + for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; ++i) { + long n = Utils.readVLong(in); + Assert.assertEquals(n, i); + } + in.close(); + fs.delete(path, false); + } + + private long writeAndVerify(int shift) throws IOException { + FSDataOutputStream out = fs.create(path); + for (int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; ++i) { + Utils.writeVLong(out, ((long) i) << shift); + } + out.close(); + FSDataInputStream in = fs.open(path); + for (int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; ++i) { + long n = Utils.readVLong(in); + Assert.assertEquals(n, ((long) i) << shift); + } + in.close(); + long ret = fs.getFileStatus(path).getLen(); + fs.delete(path, false); + return ret; + } + + public void testVLongShort() throws IOException { + long size = writeAndVerify(0); + Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 2 + + ((1 << Byte.SIZE) - 40) + * (1 << Byte.SIZE) - 128 - 32, size); + } + + public void testVLong3Bytes() throws IOException { + long size = writeAndVerify(Byte.SIZE); + Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 3 + + ((1 << Byte.SIZE) - 32) * (1 << Byte.SIZE) - 40 - 1, size); + } + + public void testVLong4Bytes() throws IOException { + long size = writeAndVerify(Byte.SIZE * 2); + Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 4 + + ((1 << Byte.SIZE) - 16) * (1 << Byte.SIZE) - 32 - 2, size); + } + + public void testVLong5Bytes() throws IOException { + long size = writeAndVerify(Byte.SIZE * 3); + Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) * 6 - 256 + - 16 - 3, size); + } + + private void verifySixOrMoreBytes(int bytes) throws IOException { + long size = writeAndVerify(Byte.SIZE * (bytes - 2)); + Assert.assertEquals("Incorrect encoded size", (1 << Short.SIZE) + * (bytes + 1) - 256 - bytes + 1, size); + } + public void testVLong6Bytes() throws IOException { + verifySixOrMoreBytes(6); + } + + public void testVLong7Bytes() throws IOException { + verifySixOrMoreBytes(7); + } + + public void testVLong8Bytes() throws IOException { + verifySixOrMoreBytes(8); + } + + public void testVLongRandom() throws IOException { + int count = 1024 * 1024; + long data[] = new long[count]; + Random rng = new Random(); + for (int i = 0; i < data.length; ++i) { + int shift = rng.nextInt(Long.SIZE) + 1; + long mask = (1L << shift) - 1; + long a = rng.nextInt() << 32; + long b = ((long) rng.nextInt()) & 0xffffffff; + data[i] = (a + b) & mask; + } + + FSDataOutputStream out = fs.create(path); + for (int i = 0; i < data.length; ++i) { + Utils.writeVLong(out, data[i]); + } + out.close(); + + FSDataInputStream in = fs.open(path); + for (int i = 0; i < data.length; ++i) { + Assert.assertEquals(Utils.readVLong(in), data[i]); + } + in.close(); + fs.delete(path, false); + } +} Added: hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java?rev=793163&view=auto ============================================================================== --- hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java (added) +++ hadoop/common/branches/branch-0.20/src/test/org/apache/hadoop/io/file/tfile/Timer.java Sat Jul 11 01:22:14 2009 @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.io.file.tfile; + +import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; + +/** + * this class is a time class to + * measure to measure the time + * taken for some event. + */ +public class Timer { + long startTimeEpoch; + long finishTimeEpoch; + private DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + public void startTime() throws IOException { + startTimeEpoch = System.currentTimeMillis(); + } + + public void stopTime() throws IOException { + finishTimeEpoch = System.currentTimeMillis(); + } + + public long getIntervalMillis() throws IOException { + return finishTimeEpoch - startTimeEpoch; + } + + public void printlnWithTimestamp(String message) throws IOException { + System.out.println(formatCurrentTime() + " " + message); + } + + public String formatTime(long millis) { + return formatter.format(millis); + } + + public String getIntervalString() throws IOException { + long time = getIntervalMillis(); + return formatTime(time); + } + + public String formatCurrentTime() { + return formatTime(System.currentTimeMillis()); + } + +} +